Merge branch 'main' into add-workflow-update-specs

Wauplin · Wauplin · commit bac80abb98dd · 2024-11-15T15:36:34.000+01:00
diff --git a/README.md b/README.md
@@ -93,7 +93,7 @@ You can run our packages with vanilla JS, without any bundler, by using a CDN or
 ```html
 <script type="module">
     import { HfInference } from 'https://cdn.jsdelivr.net/npm/@huggingface/inference@2.8.1/+esm';
-    import { createRepo, commit, deleteRepo, listFiles } from "https://cdn.jsdelivr.net/npm/@huggingface/hub@0.19.0/+esm";
+    import { createRepo, commit, deleteRepo, listFiles } from "https://cdn.jsdelivr.net/npm/@huggingface/hub@0.20.0/+esm";
 </script>
 ```
 
diff --git a/packages/hub/package.json b/packages/hub/package.json
@@ -1,7 +1,7 @@
 {
 	"name": "@huggingface/hub",
 	"packageManager": "pnpm@8.10.5",
-	"version": "0.19.0",
+	"version": "0.20.0",
 	"description": "Utilities to interact with the Hugging Face hub",
 	"repository": "https://github.com/huggingface/huggingface.js.git",
 	"publishConfig": {
diff --git a/packages/hub/src/lib/index.ts b/packages/hub/src/lib/index.ts
@@ -19,6 +19,7 @@ export * from "./model-info";
 export * from "./oauth-handle-redirect";
 export * from "./oauth-login-url";
 export * from "./parse-safetensors-metadata";
+export * from "./paths-info";
 export * from "./space-info";
 export * from "./upload-file";
 export * from "./upload-files";
diff --git a/packages/hub/src/lib/paths-info.spec.ts b/packages/hub/src/lib/paths-info.spec.ts
@@ -0,0 +1,75 @@
+import { expect, it, describe } from "vitest";
+import type { CommitInfo, PathInfo, SecurityFileStatus } from "./paths-info";
+import { pathsInfo } from "./paths-info";
+
+describe("pathsInfo", () => {
+	it("should fetch LFS path info", async () => {
+		const result: PathInfo[] = await pathsInfo({
+			repo: {
+				name: "bert-base-uncased",
+				type: "model",
+			},
+			paths: ["tf_model.h5"],
+			revision: "dd4bc8b21efa05ec961e3efc4ee5e3832a3679c7",
+		});
+
+		expect(result).toHaveLength(1);
+
+		const modelPathInfo = result[0];
+		expect(modelPathInfo.path).toBe('tf_model.h5');
+		expect(modelPathInfo.type).toBe('file');
+		// lfs pointer, therefore lfs should be defined
+		expect(modelPathInfo?.lfs).toBeDefined();
+		expect(modelPathInfo?.lfs?.oid).toBe("a7a17d6d844b5de815ccab5f42cad6d24496db3850a2a43d8258221018ce87d2");
+		expect(modelPathInfo?.lfs?.size).toBe(536063208);
+		expect(modelPathInfo?.lfs?.pointerSize).toBe(134);
+
+		// should not include expand info
+		expect(modelPathInfo.lastCommit).toBeUndefined();
+		expect(modelPathInfo.securityFileStatus).toBeUndefined();
+	});
+
+	it("expand parmas should fetch lastCommit and securityFileStatus", async () => {
+		const result: (PathInfo & {
+			lastCommit: CommitInfo,
+			securityFileStatus: SecurityFileStatus,
+		})[] = await pathsInfo({
+			repo: {
+				name: "bert-base-uncased",
+				type: "model",
+			},
+			paths: ["tf_model.h5"],
+			revision: "dd4bc8b21efa05ec961e3efc4ee5e3832a3679c7",
+			expand: true, // include
+		});
+
+		expect(result).toHaveLength(1);
+
+		const modelPathInfo = result[0];
+
+		// should include expand info
+		expect(modelPathInfo.lastCommit).toBeDefined();
+		expect(modelPathInfo.securityFileStatus).toBeDefined();
+
+		expect(modelPathInfo.lastCommit.id).toBe("dd4bc8b21efa05ec961e3efc4ee5e3832a3679c7");
+		expect(modelPathInfo.lastCommit.title).toBe("Update tf_model.h5");
+		expect(modelPathInfo.lastCommit.date.getTime()).toBe(1569268124000); // 2019-09-23T19:48:44.000Z
+	});
+
+	it("non-LFS pointer should have lfs undefined", async () => {
+		const result: (PathInfo)[] = await pathsInfo({
+			repo: {
+				name: "bert-base-uncased",
+				type: "model",
+			},
+			paths: ["config.json"],
+			revision: "dd4bc8b21efa05ec961e3efc4ee5e3832a3679c7",
+		});
+
+		expect(result).toHaveLength(1);
+
+		const modelPathInfo = result[0];
+		expect(modelPathInfo.path).toBe("config.json");
+		expect(modelPathInfo.lfs).toBeUndefined();
+	});
+});
diff --git a/packages/hub/src/lib/paths-info.ts b/packages/hub/src/lib/paths-info.ts
@@ -0,0 +1,120 @@
+import type { CredentialsParams, RepoDesignation } from "../types/public";
+import { checkCredentials } from "../utils/checkCredentials";
+import { toRepoId } from "../utils/toRepoId";
+import { HUB_URL } from "../consts";
+import { createApiError } from "../error";
+
+export interface LfsPathInfo {
+	"oid": string,
+	"size": number,
+	"pointerSize": number
+}
+
+export  interface CommitInfo {
+	"id": string,
+	"title": string,
+	"date": Date,
+}
+
+export interface SecurityFileStatus {
+	"status": string,
+}
+
+export interface PathInfo {
+	path: string,
+	type: string,
+	oid: string,
+	size: number,
+	/**
+	 * Only defined when path is LFS pointer
+	 */
+	lfs?: LfsPathInfo,
+	lastCommit?: CommitInfo,
+	securityFileStatus?: SecurityFileStatus
+}
+
+// Define the overloaded signatures
+export function pathsInfo(
+	params: {
+		repo: RepoDesignation;
+		paths: string[];
+		expand: true; // if expand true
+		revision?: string;
+		hubUrl?: string;
+		/**
+		 * Custom fetch function to use instead of the default one, for example to use a proxy or edit headers.
+		 */
+		fetch?: typeof fetch;
+	}  & Partial<CredentialsParams>
+): Promise<(PathInfo & {lastCommit: CommitInfo, securityFileStatus: SecurityFileStatus })[]>;
+export function pathsInfo(
+	params: {
+		repo: RepoDesignation;
+		paths: string[];
+		expand?: boolean;
+		revision?: string;
+		hubUrl?: string;
+		/**
+		 * Custom fetch function to use instead of the default one, for example to use a proxy or edit headers.
+		 */
+		fetch?: typeof fetch;
+	}  & Partial<CredentialsParams>
+): Promise<(PathInfo)[]>;
+
+export async function pathsInfo(
+	params: {
+		repo: RepoDesignation;
+		paths: string[];
+		expand?: boolean;
+		revision?: string;
+		hubUrl?: string;
+		/**
+		 * Custom fetch function to use instead of the default one, for example to use a proxy or edit headers.
+		 */
+		fetch?: typeof fetch;
+	}  & Partial<CredentialsParams>
+): Promise<PathInfo[]> {
+	const accessToken = checkCredentials(params);
+	const repoId = toRepoId(params.repo);
+
+	const hubUrl = params.hubUrl ?? HUB_URL;
+
+	const url = `${hubUrl}/api/${repoId.type}s/${repoId.name}/paths-info/${encodeURIComponent(params.revision ?? "main")}`;
+
+	const resp = await (params.fetch ?? fetch)(url, {
+		method: "POST",
+		headers: {
+			...(params.credentials && {
+				Authorization: `Bearer ${accessToken}`,
+			}),
+			'Accept': 'application/json',
+			'Content-Type': 'application/json'
+		},
+		body: JSON.stringify({
+			paths: params.paths,
+			expand: params.expand,
+		}),
+	});
+
+	if (!resp.ok) {
+		throw await createApiError(resp);
+	}
+
+	const json: unknown = await resp.json();
+	if(!Array.isArray(json)) throw new Error('malformed response: expected array');
+
+	return json.map((item: PathInfo) => ({
+		path: item.path,
+		lfs: item.lfs,
+		type: item.type,
+		oid: item.oid,
+		size: item.size,
+		// expand fields
+		securityFileStatus: item.securityFileStatus,
+		lastCommit: item.lastCommit ? {
+			date: new Date(item.lastCommit.date),
+			title: item.lastCommit.title,
+			id: item.lastCommit.id,
+		}: undefined,
+	}));
+}
diff --git a/packages/tasks/.prettierignore b/packages/tasks/.prettierignore
@@ -1,4 +1,5 @@
 pnpm-lock.yaml
 # In order to avoid code samples to have tabs, they don't display well on npm
 README.md
-dist
+dist
+.tshy
diff --git a/packages/tasks/package.json b/packages/tasks/package.json
@@ -1,7 +1,7 @@
 {
 	"name": "@huggingface/tasks",
 	"packageManager": "pnpm@8.10.5",
-	"version": "0.13.1",
+	"version": "0.13.2",
 	"description": "List of ML tasks for huggingface.co/tasks",
 	"repository": "https://github.com/huggingface/huggingface.js.git",
 	"publishConfig": {
diff --git a/packages/tasks/src/snippets/js.spec.ts b/packages/tasks/src/snippets/js.spec.ts
@@ -12,9 +12,9 @@ describe("inference API snippets", () => {
 		};
 		const snippet = getJsInferenceSnippet(model, "api_token") as InferenceSnippet[];
 
-		expect(snippet[0].content).toEqual(`import { HfInference } from "@huggingface/inference"
+		expect(snippet[0].content).toEqual(`import { HfInference } from "@huggingface/inference";
 
-const client = new HfInference("api_token")
+const client = new HfInference("api_token");
 
 let out = "";
 
@@ -47,9 +47,9 @@ for await (const chunk of stream) {
 		};
 		const snippet = getJsInferenceSnippet(model, "api_token", { streaming: false }) as InferenceSnippet[];
 
-		expect(snippet[0].content).toEqual(`import { HfInference } from "@huggingface/inference"
+		expect(snippet[0].content).toEqual(`import { HfInference } from "@huggingface/inference";
 
-const client = new HfInference("api_token")
+const client = new HfInference("api_token");
 
 const chatCompletion = await client.chatCompletion({
 	model: "meta-llama/Llama-3.1-8B-Instruct",
@@ -74,9 +74,9 @@ console.log(chatCompletion.choices[0].message);`);
 		};
 		const snippet = getJsInferenceSnippet(model, "api_token") as InferenceSnippet[];
 
-		expect(snippet[0].content).toEqual(`import { HfInference } from "@huggingface/inference"
+		expect(snippet[0].content).toEqual(`import { HfInference } from "@huggingface/inference";
 
-const client = new HfInference("api_token")
+const client = new HfInference("api_token");
 
 let out = "";
 
@@ -120,9 +120,9 @@ for await (const chunk of stream) {
 		};
 		const snippet = getJsInferenceSnippet(model, "api_token") as InferenceSnippet[];
 
-		expect(snippet[0].content).toEqual(`import { HfInference } from "@huggingface/inference"
+		expect(snippet[0].content).toEqual(`import { HfInference } from "@huggingface/inference";
 
-const client = new HfInference("api_token")
+const client = new HfInference("api_token");
 
 let out = "";
 
diff --git a/packages/tasks/src/snippets/js.ts b/packages/tasks/src/snippets/js.ts
@@ -58,9 +58,9 @@ export const snippetTextGeneration = (
 			return [
 				{
 					client: "huggingface.js",
-					content: `import { HfInference } from "@huggingface/inference"
+					content: `import { HfInference } from "@huggingface/inference";
 
-const client = new HfInference("${accessToken || `{API_TOKEN}`}")
+const client = new HfInference("${accessToken || `{API_TOKEN}`}");
 
 let out = "";
 
@@ -80,12 +80,12 @@ for await (const chunk of stream) {
 				},
 				{
 					client: "openai",
-					content: `import { OpenAI } from "openai"
+					content: `import { OpenAI } from "openai";
 
 const client = new OpenAI({
 	baseURL: "https://api-inference.huggingface.co/v1/",
     apiKey: "${accessToken || `{API_TOKEN}`}"
-})
+});
 
 let out = "";
 
@@ -109,9 +109,9 @@ for await (const chunk of stream) {
 			return [
 				{
 					client: "huggingface.js",
-					content: `import { HfInference } from "@huggingface/inference"
+					content: `import { HfInference } from "@huggingface/inference";
 
-const client = new HfInference("${accessToken || `{API_TOKEN}`}")
+const client = new HfInference("${accessToken || `{API_TOKEN}`}");
 
 const chatCompletion = await client.chatCompletion({
 	model: "${model.id}",
@@ -123,12 +123,12 @@ console.log(chatCompletion.choices[0].message);`,
 				},
 				{
 					client: "openai",
-					content: `import { OpenAI } from "openai"
+					content: `import { OpenAI } from "openai";
 
 const client = new OpenAI({
     baseURL: "https://api-inference.huggingface.co/v1/",
     apiKey: "${accessToken || `{API_TOKEN}`}"
-})
+});
 
 const chatCompletion = await client.chat.completions.create({
 	model: "${model.id}",
diff --git a/packages/tasks/src/tasks/automatic-speech-recognition/data.ts b/packages/tasks/src/tasks/automatic-speech-recognition/data.ts
@@ -6,12 +6,16 @@ const taskData: TaskDataCustom = {
 			description: "31,175 hours of multilingual audio-text dataset in 108 languages.",
 			id: "mozilla-foundation/common_voice_17_0",
 		},
+		{
+			description: "Multilingual and diverse audio dataset with 101k hours of audio.",
+			id: "amphion/Emilia-Dataset",
+		},
 		{
 			description: "A dataset with 44.6k hours of English speaker data and 6k hours of other language speakers.",
 			id: "parler-tts/mls_eng",
 		},
 		{
-			description: "A multi-lingual audio dataset with 370K hours of audio.",
+			description: "A multilingual audio dataset with 370K hours of audio.",
 			id: "espnet/yodas",
 		},
 	],
@@ -54,6 +58,10 @@ const taskData: TaskDataCustom = {
 			description: "An end-to-end model that performs ASR and Speech Translation by MetaAI.",
 			id: "facebook/seamless-m4t-v2-large",
 		},
+		{
+			description: "A powerful multilingual ASR and Speech Translation model by Nvidia.",
+			id: "nvidia/canary-1b",
+		},
 		{
 			description: "Powerful speaker diarization model.",
 			id: "pyannote/speaker-diarization-3.1",
@@ -65,13 +73,17 @@ const taskData: TaskDataCustom = {
 			id: "hf-audio/whisper-large-v3",
 		},
 		{
-			description: "Fastest speech recognition application.",
-			id: "sanchit-gandhi/whisper-jax",
+			description: "Latest ASR model from Useful Sensors.",
+			id: "mrfakename/Moonshinex",
 		},
 		{
 			description: "A high quality speech and text translation model by Meta.",
 			id: "facebook/seamless_m4t",
 		},
+		{
+			description: "A powerful multilingual ASR and Speech Translation model by Nvidia",
+			id: "nvidia/canary-1b",
+		},
 	],
 	summary:
 		"Automatic Speech Recognition (ASR), also known as Speech to Text (STT), is the task of transcribing a given audio to text. It has many applications, such as voice user interfaces.",
diff --git a/packages/tasks/src/tasks/mask-generation/about.md b/packages/tasks/src/tasks/mask-generation/about.md
diff --git a/packages/tasks/src/tasks/mask-generation/data.ts b/packages/tasks/src/tasks/mask-generation/data.ts

Original file line number	Diff line number	Diff line change
`@@ -1,7 +1,7 @@`
`1`	`1`	`{`
`2`	`2`	`"name": "@huggingface/hub",`
`3`	`3`	`"packageManager": "[email protected]",`
`4`		`- "version": "0.19.0",`
	`4`	`+ "version": "0.20.0",`
`5`	`5`	`"description": "Utilities to interact with the Hugging Face hub",`
`6`	`6`	`"repository": "https://github.com/huggingface/huggingface.js.git",`
`7`	`7`	`"publishConfig": {`
Original file line number	Diff line number	Diff line change
`@@ -1,7 +1,7 @@`
`1`	`1`	`{`
`2`	`2`	`"name": "@huggingface/tasks",`
`3`	`3`	`"packageManager": "[email protected]",`
`4`		`- "version": "0.13.1",`
	`4`	`+ "version": "0.13.2",`
`5`	`5`	`"description": "List of ML tasks for huggingface.co/tasks",`
`6`	`6`	`"repository": "https://github.com/huggingface/huggingface.js.git",`
`7`	`7`	`"publishConfig": {`