Merge branch 'main' into document-python-text-to-image-snippet

Wauplin · Wauplin · commit 6b167c59c937 · 2024-11-14T15:31:39.000+01:00
diff --git a/packages/tasks/package.json b/packages/tasks/package.json
@@ -1,7 +1,7 @@
 {
 	"name": "@huggingface/tasks",
 	"packageManager": "pnpm@8.10.5",
-	"version": "0.13.0",
+	"version": "0.13.1",
 	"description": "List of ML tasks for huggingface.co/tasks",
 	"repository": "https://github.com/huggingface/huggingface.js.git",
 	"publishConfig": {
diff --git a/packages/tasks/src/snippets/curl.spec.ts b/packages/tasks/src/snippets/curl.spec.ts
@@ -1,6 +1,6 @@
 import type { ModelDataMinimal } from "./types";
 import { describe, expect, it } from "vitest";
-import { snippetTextGeneration } from "./curl";
+import { getCurlInferenceSnippet } from "./curl";
 
 describe("inference API snippets", () => {
 	it("conversational llm", async () => {
@@ -10,7 +10,7 @@ describe("inference API snippets", () => {
 			tags: ["conversational"],
 			inference: "",
 		};
-		const snippet = snippetTextGeneration(model, "api_token");
+		const snippet = getCurlInferenceSnippet(model, "api_token");
 
 		expect(snippet.content)
 			.toEqual(`curl 'https://api-inference.huggingface.co/models/meta-llama/Llama-3.1-8B-Instruct/v1/chat/completions' \\
@@ -29,14 +29,40 @@ describe("inference API snippets", () => {
 }'`);
 	});
 
+	it("conversational llm non-streaming", async () => {
+		const model: ModelDataMinimal = {
+			id: "meta-llama/Llama-3.1-8B-Instruct",
+			pipeline_tag: "text-generation",
+			tags: ["conversational"],
+			inference: "",
+		};
+		const snippet = getCurlInferenceSnippet(model, "api_token", { streaming: false });
+
+		expect(snippet.content)
+			.toEqual(`curl 'https://api-inference.huggingface.co/models/meta-llama/Llama-3.1-8B-Instruct/v1/chat/completions' \\
+-H "Authorization: Bearer api_token" \\
+-H 'Content-Type: application/json' \\
+--data '{
+    "model": "meta-llama/Llama-3.1-8B-Instruct",
+    "messages": [
+		{
+			"role": "user",
+			"content": "What is the capital of France?"
+		}
+	],
+    "max_tokens": 500,
+    "stream": false
+}'`);
+	});
+
 	it("conversational vlm", async () => {
 		const model: ModelDataMinimal = {
 			id: "meta-llama/Llama-3.2-11B-Vision-Instruct",
 			pipeline_tag: "image-text-to-text",
 			tags: ["conversational"],
 			inference: "",
 		};
-		const snippet = snippetTextGeneration(model, "api_token");
+		const snippet = getCurlInferenceSnippet(model, "api_token");
 
 		expect(snippet.content)
 			.toEqual(`curl 'https://api-inference.huggingface.co/models/meta-llama/Llama-3.2-11B-Vision-Instruct/v1/chat/completions' \\
diff --git a/packages/tasks/src/snippets/curl.ts b/packages/tasks/src/snippets/curl.ts
@@ -105,9 +105,13 @@ export const curlSnippets: Partial<
 	"image-segmentation": snippetFile,
 };
 
-export function getCurlInferenceSnippet(model: ModelDataMinimal, accessToken: string): InferenceSnippet {
+export function getCurlInferenceSnippet(
+	model: ModelDataMinimal,
+	accessToken: string,
+	opts?: Record<string, unknown>
+): InferenceSnippet {
 	return model.pipeline_tag && model.pipeline_tag in curlSnippets
-		? curlSnippets[model.pipeline_tag]?.(model, accessToken) ?? { content: "" }
+		? curlSnippets[model.pipeline_tag]?.(model, accessToken, opts) ?? { content: "" }
 		: { content: "" };
 }
 
diff --git a/packages/tasks/src/snippets/js.spec.ts b/packages/tasks/src/snippets/js.spec.ts
@@ -1,6 +1,6 @@
 import type { InferenceSnippet, ModelDataMinimal } from "./types";
 import { describe, expect, it } from "vitest";
-import { snippetTextGeneration } from "./js";
+import { getJsInferenceSnippet } from "./js";
 
 describe("inference API snippets", () => {
 	it("conversational llm", async () => {
@@ -10,7 +10,7 @@ describe("inference API snippets", () => {
 			tags: ["conversational"],
 			inference: "",
 		};
-		const snippet = snippetTextGeneration(model, "api_token") as InferenceSnippet[];
+		const snippet = getJsInferenceSnippet(model, "api_token") as InferenceSnippet[];
 
 		expect(snippet[0].content).toEqual(`import { HfInference } from "@huggingface/inference"
 
@@ -38,14 +38,41 @@ for await (const chunk of stream) {
 }`);
 	});
 
+	it("conversational llm non-streaming", async () => {
+		const model: ModelDataMinimal = {
+			id: "meta-llama/Llama-3.1-8B-Instruct",
+			pipeline_tag: "text-generation",
+			tags: ["conversational"],
+			inference: "",
+		};
+		const snippet = getJsInferenceSnippet(model, "api_token", { streaming: false }) as InferenceSnippet[];
+
+		expect(snippet[0].content).toEqual(`import { HfInference } from "@huggingface/inference"
+
+const client = new HfInference("api_token")
+
+const chatCompletion = await client.chatCompletion({
+	model: "meta-llama/Llama-3.1-8B-Instruct",
+	messages: [
+		{
+			role: "user",
+			content: "What is the capital of France?"
+		}
+	],
+	max_tokens: 500
+});
+
+console.log(chatCompletion.choices[0].message);`);
+	});
+
 	it("conversational vlm", async () => {
 		const model: ModelDataMinimal = {
 			id: "meta-llama/Llama-3.2-11B-Vision-Instruct",
 			pipeline_tag: "image-text-to-text",
 			tags: ["conversational"],
 			inference: "",
 		};
-		const snippet = snippetTextGeneration(model, "api_token") as InferenceSnippet[];
+		const snippet = getJsInferenceSnippet(model, "api_token") as InferenceSnippet[];
 
 		expect(snippet[0].content).toEqual(`import { HfInference } from "@huggingface/inference"
 
@@ -75,6 +102,41 @@ const stream = client.chatCompletionStream({
 	max_tokens: 500
 });
 
+for await (const chunk of stream) {
+	if (chunk.choices && chunk.choices.length > 0) {
+		const newContent = chunk.choices[0].delta.content;
+		out += newContent;
+		console.log(newContent);
+	}  
+}`);
+	});
+
+	it("conversational llm", async () => {
+		const model: ModelDataMinimal = {
+			id: "meta-llama/Llama-3.1-8B-Instruct",
+			pipeline_tag: "text-generation",
+			tags: ["conversational"],
+			inference: "",
+		};
+		const snippet = getJsInferenceSnippet(model, "api_token") as InferenceSnippet[];
+
+		expect(snippet[0].content).toEqual(`import { HfInference } from "@huggingface/inference"
+
+const client = new HfInference("api_token")
+
+let out = "";
+
+const stream = client.chatCompletionStream({
+	model: "meta-llama/Llama-3.1-8B-Instruct",
+	messages: [
+		{
+			role: "user",
+			content: "What is the capital of France?"
+		}
+	],
+	max_tokens: 500
+});
+
 for await (const chunk of stream) {
 	if (chunk.choices && chunk.choices.length > 0) {
 		const newContent = chunk.choices[0].delta.content;
diff --git a/packages/tasks/src/snippets/js.ts b/packages/tasks/src/snippets/js.ts
@@ -109,7 +109,7 @@ for await (const chunk of stream) {
 			return [
 				{
 					client: "huggingface.js",
-					content: `import { HfInference } from '@huggingface/inference'
+					content: `import { HfInference } from "@huggingface/inference"
 
 const client = new HfInference("${accessToken || `{API_TOKEN}`}")
 
@@ -292,10 +292,11 @@ export const jsSnippets: Partial<
 
 export function getJsInferenceSnippet(
 	model: ModelDataMinimal,
-	accessToken: string
+	accessToken: string,
+	opts?: Record<string, unknown>
 ): InferenceSnippet | InferenceSnippet[] {
 	return model.pipeline_tag && model.pipeline_tag in jsSnippets
-		? jsSnippets[model.pipeline_tag]?.(model, accessToken) ?? { content: "" }
+		? jsSnippets[model.pipeline_tag]?.(model, accessToken, opts) ?? { content: "" }
 		: { content: "" };
 }
 
diff --git a/packages/tasks/src/snippets/python.spec.ts b/packages/tasks/src/snippets/python.spec.ts
@@ -1,6 +1,6 @@
 import type { InferenceSnippet, ModelDataMinimal } from "./types";
 import { describe, expect, it } from "vitest";
-import { snippetConversational, getPythonInferenceSnippet } from "./python";
+import { getPythonInferenceSnippet } from "./python";
 
 describe("inference API snippets", () => {
 	it("conversational llm", async () => {
@@ -10,7 +10,7 @@ describe("inference API snippets", () => {
 			tags: ["conversational"],
 			inference: "",
 		};
-		const snippet = snippetConversational(model, "api_token");
+		const snippet = getPythonInferenceSnippet(model, "api_token") as InferenceSnippet[];
 
 		expect(snippet[0].content).toEqual(`from huggingface_hub import InferenceClient
 
@@ -34,14 +34,43 @@ for chunk in stream:
     print(chunk.choices[0].delta.content, end="")`);
 	});
 
+	it("conversational llm non-streaming", async () => {
+		const model: ModelDataMinimal = {
+			id: "meta-llama/Llama-3.1-8B-Instruct",
+			pipeline_tag: "text-generation",
+			tags: ["conversational"],
+			inference: "",
+		};
+		const snippet = getPythonInferenceSnippet(model, "api_token", { streaming: false }) as InferenceSnippet[];
+
+		expect(snippet[0].content).toEqual(`from huggingface_hub import InferenceClient
+
+client = InferenceClient(api_key="api_token")
+
+messages = [
+	{
+		"role": "user",
+		"content": "What is the capital of France?"
+	}
+]
+
+completion = client.chat.completions.create(
+    model="meta-llama/Llama-3.1-8B-Instruct", 
+	messages=messages, 
+	max_tokens=500
+)
+
+print(completion.choices[0].message)`);
+	});
+
 	it("conversational vlm", async () => {
 		const model: ModelDataMinimal = {
 			id: "meta-llama/Llama-3.2-11B-Vision-Instruct",
 			pipeline_tag: "image-text-to-text",
 			tags: ["conversational"],
 			inference: "",
 		};
-		const snippet = snippetConversational(model, "api_token");
+		const snippet = getPythonInferenceSnippet(model, "api_token") as InferenceSnippet[];
 
 		expect(snippet[0].content).toEqual(`from huggingface_hub import InferenceClient
 

Original file line number	Diff line number	Diff line change
`@@ -1,7 +1,7 @@`
`1`	`1`	`{`
`2`	`2`	`"name": "@huggingface/tasks",`
`3`	`3`	`"packageManager": "[email protected]",`
`4`		`- "version": "0.13.0",`
	`4`	`+ "version": "0.13.1",`
`5`	`5`	`"description": "List of ML tasks for huggingface.co/tasks",`
`6`	`6`	`"repository": "https://github.com/huggingface/huggingface.js.git",`
`7`	`7`	`"publishConfig": {`