Merge branch 'main' into update_tasks

NielsRogge · web-flow · commit 876caee75fa9 · 2024-10-16T15:56:54.000+02:00
diff --git a/.github/pull_request_template/new_library.md b/.github/pull_request_template/new_library.md
diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
@@ -18,12 +18,15 @@ jobs:
       - name: "Extracting the merge base into 'SINCE'"
         id: since
         run: |
-          if [ -z "${{  github.event.pull_request.head.ref }}" ]
+          if [ -z $PR_REF ]
           then
-            echo "SINCE=${{ github.sha }}^1" >> $GITHUB_OUTPUT
+            echo "SINCE=$SHA^1" >> $GITHUB_OUTPUT
           else
-            echo "SINCE=$(git merge-base origin/${{ github.event.pull_request.base.ref }} ${{ github.sha }})" >> $GITHUB_OUTPUT
+            echo "SINCE=$(git merge-base origin/$PR_REF $SHA)" >> $GITHUB_OUTPUT
           fi
+        env: 
+          PR_REF: ${{  github.event.pull_request.head.ref }}
+          SHA: ${{ github.sha }}
 
       - run: corepack enable
 
diff --git a/README.md b/README.md
@@ -93,7 +93,7 @@ You can run our packages with vanilla JS, without any bundler, by using a CDN or
 ```html
 <script type="module">
     import { HfInference } from 'https://cdn.jsdelivr.net/npm/@huggingface/inference@2.8.1/+esm';
-    import { createRepo, commit, deleteRepo, listFiles } from "https://cdn.jsdelivr.net/npm/@huggingface/hub@0.18.1/+esm";
+    import { createRepo, commit, deleteRepo, listFiles } from "https://cdn.jsdelivr.net/npm/@huggingface/hub@0.18.2/+esm";
 </script>
 ```
 
diff --git a/packages/gguf/package.json b/packages/gguf/package.json
@@ -1,7 +1,7 @@
 {
 	"name": "@huggingface/gguf",
 	"packageManager": "pnpm@8.10.5",
-	"version": "0.1.10",
+	"version": "0.1.12",
 	"description": "a GGUF parser that works on remotely hosted files",
 	"repository": "https://github.com/huggingface/huggingface.js.git",
 	"publishConfig": {
@@ -27,6 +27,7 @@
 	},
 	"source": "index.ts",
 	"scripts": {
+		"prepare": "pnpm run build",
 		"lint": "eslint --quiet --fix --ext .cjs,.ts .",
 		"lint:check": "eslint --ext .cjs,.ts .",
 		"format": "prettier --write .",
diff --git a/packages/gguf/src/gguf.spec.ts b/packages/gguf/src/gguf.spec.ts
@@ -1,6 +1,13 @@
 import { beforeAll, describe, expect, it } from "vitest";
 import type { GGUFParseOutput } from "./gguf";
-import { GGMLFileQuantizationType, GGMLQuantizationType, gguf, ggufAllShards, parseGgufShardFilename } from "./gguf";
+import {
+	GGMLFileQuantizationType,
+	GGMLQuantizationType,
+	gguf,
+	ggufAllShards,
+	parseGgufShardFilename,
+	parseGGUFQuantLabel,
+} from "./gguf";
 import fs from "node:fs";
 
 const URL_LLAMA = "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/191239b/llama-2-7b-chat.Q2_K.gguf";
@@ -266,4 +273,14 @@ describe("gguf", () => {
 		const { parameterCount } = await ggufAllShards(URL_SHARDED_GROK);
 		expect(parameterCount).toEqual(316_490_127_360); // 316B
 	});
+
+	it("parse quant label", async () => {
+		expect(parseGGUFQuantLabel("Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf")).toEqual("Q4_K_M");
+		expect(parseGGUFQuantLabel("subdir/Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf")).toEqual("Q4_K_M");
+		expect(parseGGUFQuantLabel("Codestral-22B-v0.1-Q2_K.gguf")).toEqual("Q2_K");
+		expect(parseGGUFQuantLabel("Codestral-22B-v0.1.gguf")).toEqual(undefined);
+		expect(parseGGUFQuantLabel("Codestral-22B-v0.1-F32-Q2_K.gguf")).toEqual("Q2_K"); // gguf name with two quant labels [F32, Q2_K]
+		expect(parseGGUFQuantLabel("Codestral-22B-v0.1-IQ3_XS.gguf")).toEqual(undefined); // TODO: investigate IQ3_XS
+		expect(parseGGUFQuantLabel("Codestral-22B-v0.1-Q4_0_4_4.gguf")).toEqual("Q4_0"); // TODO: investigate Q4_0_4_4
+	});
 });
diff --git a/packages/gguf/src/gguf.ts b/packages/gguf/src/gguf.ts
@@ -1,5 +1,5 @@
 import type { MetadataValue, Version, GGUFMetadata, GGUFTensorInfo, GGUFParseOutput } from "./types";
-import { GGUFValueType } from "./types";
+import { GGMLQuantizationType, GGUFValueType } from "./types";
 import { isBackend } from "./utils/isBackend";
 import { promisesQueue } from "./utils/promisesQueue";
 
@@ -29,6 +29,15 @@ export function parseGgufShardFilename(filename: string): GgufShardFileInfo | nu
 	return null;
 }
 
+const ggufQuants = Object.values(GGMLQuantizationType).filter((v): v is string => typeof v === "string");
+export const GGUF_QUANT_RE = new RegExp(`(?<quant>${ggufQuants.join("|")})` + "(_(?<sizeVariation>[A-Z]+))?");
+export const GGUF_QUANT_RE_GLOBAL = new RegExp(GGUF_QUANT_RE, "g");
+
+export function parseGGUFQuantLabel(fname: string): string | undefined {
+	const quantLabel = fname.toUpperCase().match(GGUF_QUANT_RE_GLOBAL)?.at(-1); // if there is multiple quant substrings in a name, we prefer the last one
+	return quantLabel;
+}
+
 const isVersion = (version: number): version is Version => version === 1 || version === 2 || version === 3;
 
 /**
diff --git a/packages/hub/package.json b/packages/hub/package.json
@@ -1,7 +1,7 @@
 {
 	"name": "@huggingface/hub",
 	"packageManager": "pnpm@8.10.5",
-	"version": "0.18.1",
+	"version": "0.18.2",
 	"description": "Utilities to interact with the Hugging Face hub",
 	"repository": "https://github.com/huggingface/huggingface.js.git",
 	"publishConfig": {
diff --git a/packages/tasks/package.json b/packages/tasks/package.json
@@ -1,7 +1,7 @@
 {
 	"name": "@huggingface/tasks",
 	"packageManager": "pnpm@8.10.5",
-	"version": "0.12.19",
+	"version": "0.12.22",
 	"description": "List of ML tasks for huggingface.co/tasks",
 	"repository": "https://github.com/huggingface/huggingface.js.git",
 	"publishConfig": {
@@ -51,5 +51,8 @@
 		"@types/node": "^20.11.5",
 		"quicktype-core": "https://github.com/huggingface/quicktype/raw/pack-18.0.17/packages/quicktype-core/quicktype-core-18.0.17.tgz",
 		"type-fest": "^3.13.1"
+	},
+	"dependencies": {
+		"@huggingface/gguf": "workspace:^"
 	}
 }
diff --git a/packages/tasks/pnpm-lock.yaml b/packages/tasks/pnpm-lock.yaml
diff --git a/packages/tasks/src/local-apps.ts b/packages/tasks/src/local-apps.ts
@@ -1,5 +1,6 @@
 import type { ModelData } from "./model-data";
 import type { PipelineType } from "./pipelines";
+import { parseGGUFQuantLabel } from "@huggingface/gguf";
 
 export interface LocalAppSnippet {
 	/**
@@ -53,6 +54,7 @@ export type LocalApp = {
 			/**
 			 * And if not (mostly llama.cpp), snippet to copy/paste in your terminal
 			 * Support the placeholder {{GGUF_FILE}} that will be replaced by the gguf file path or the list of available files.
+			 * Support the placeholder {{OLLAMA_TAG}} that will be replaced by the list of available quant tags or will be removed if there are no multiple quant files in a same repo.
 			 */
 			snippet: (model: ModelData, filepath?: string) => string | string[] | LocalAppSnippet | LocalAppSnippet[];
 	  }
@@ -77,11 +79,18 @@ function isMarlinModel(model: ModelData): boolean {
 function isTransformersModel(model: ModelData): boolean {
 	return model.tags.includes("transformers");
 }
+function isTgiModel(model: ModelData): boolean {
+	return model.tags.includes("text-generation-inference");
+}
 
 function isLlamaCppGgufModel(model: ModelData) {
 	return !!model.gguf?.context_length;
 }
 
+function isMlxModel(model: ModelData) {
+	return model.tags.includes("mlx");
+}
+
 const snippetLlamacpp = (model: ModelData, filepath?: string): LocalAppSnippet[] => {
 	const command = (binary: string) =>
 		[
@@ -119,6 +128,32 @@ const snippetLlamacpp = (model: ModelData, filepath?: string): LocalAppSnippet[]
 	];
 };
 
+const snippetNodeLlamaCppCli = (model: ModelData, filepath?: string): LocalAppSnippet[] => {
+	return [
+		{
+			title: "Chat with the model",
+			content: [
+				`npx -y node-llama-cpp chat \\`,
+				`  --model "hf:${model.id}/${filepath ?? "{{GGUF_FILE}}"}" \\`,
+				`  --prompt 'Hi there!'`,
+			].join("\n"),
+		},
+		{
+			title: "Estimate the model compatibility with your hardware",
+			content: `npx -y node-llama-cpp inspect estimate "hf:${model.id}/${filepath ?? "{{GGUF_FILE}}"}"`,
+		},
+	];
+};
+
+const snippetOllama = (model: ModelData, filepath?: string): string => {
+	if (filepath) {
+		const quantLabel = parseGGUFQuantLabel(filepath);
+		const ollamatag = quantLabel ? `:${quantLabel}` : "";
+		return `ollama run hf.co/${model.id}${ollamatag}`;
+	}
+	return `ollama run hf.co/${model.id}{{OLLAMA_TAG}}`;
+};
+
 const snippetLocalAI = (model: ModelData, filepath?: string): LocalAppSnippet[] => {
 	const command = (binary: string) =>
 		["# Load and run the model:", `${binary} huggingface://${model.id}/${filepath ?? "{{GGUF_FILE}}"}`].join("\n");
@@ -180,6 +215,34 @@ const snippetVllm = (model: ModelData): LocalAppSnippet[] => {
 		},
 	];
 };
+const snippetTgi = (model: ModelData): LocalAppSnippet[] => {
+	const runCommand = [
+		"# Call the server using curl:",
+		`curl -X POST "http://localhost:8000/v1/chat/completions" \\`,
+		`	-H "Content-Type: application/json" \\`,
+		`	--data '{`,
+		`		"model": "${model.id}",`,
+		`		"messages": [`,
+		`			{"role": "user", "content": "What is the capital of France?"}`,
+		`		]`,
+		`	}'`,
+	];
+	return [
+		{
+			title: "Use Docker images",
+			setup: [
+				"# Deploy with docker on Linux:",
+				`docker run --gpus all \\`,
+				`	-v ~/.cache/huggingface:/root/.cache/huggingface \\`,
+				` 	-e HF_TOKEN="<secret>" \\`,
+				`	-p 8000:80 \\`,
+				`	ghcr.io/huggingface/text-generation-inference:latest \\`,
+				`	--model-id ${model.id}`,
+			].join("\n"),
+			content: [runCommand.join("\n")],
+		},
+	];
+};
 
 /**
  * Add your new local app here.
@@ -200,6 +263,13 @@ export const LOCAL_APPS = {
 		displayOnModelPage: isLlamaCppGgufModel,
 		snippet: snippetLlamacpp,
 	},
+	"node-llama-cpp": {
+		prettyLabel: "node-llama-cpp",
+		docsUrl: "https://node-llama-cpp.withcat.ai",
+		mainTask: "text-generation",
+		displayOnModelPage: isLlamaCppGgufModel,
+		snippet: snippetNodeLlamaCppCli,
+	},
 	vllm: {
 		prettyLabel: "vLLM",
 		docsUrl: "https://docs.vllm.ai",
@@ -214,11 +284,18 @@ export const LOCAL_APPS = {
 			(model.pipeline_tag === "text-generation" || model.pipeline_tag === "image-text-to-text"),
 		snippet: snippetVllm,
 	},
+	tgi: {
+		prettyLabel: "TGI",
+		docsUrl: "https://huggingface.co/docs/text-generation-inference/",
+		mainTask: "text-generation",
+		displayOnModelPage: isTgiModel,
+		snippet: snippetTgi,
+	},
 	lmstudio: {
 		prettyLabel: "LM Studio",
 		docsUrl: "https://lmstudio.ai",
 		mainTask: "text-generation",
-		displayOnModelPage: isLlamaCppGgufModel,
+		displayOnModelPage: (model) => isLlamaCppGgufModel(model) || isMlxModel(model),
 		deeplink: (model, filepath) =>
 			new URL(`lmstudio://open_from_hf?model=${model.id}${filepath ? `&file=${filepath}` : ""}`),
 	},
@@ -323,6 +400,13 @@ export const LOCAL_APPS = {
 		displayOnModelPage: (model) => model.library_name === "diffusers" && model.pipeline_tag === "text-to-image",
 		deeplink: (model) => new URL(`https://models.invoke.ai/huggingface/${model.id}`),
 	},
+	ollama: {
+		prettyLabel: "Ollama",
+		docsUrl: "https://ollama.com",
+		mainTask: "text-generation",
+		displayOnModelPage: isLlamaCppGgufModel,
+		snippet: snippetOllama,
+	},
 } satisfies Record<string, LocalApp>;
 
 export type LocalAppKey = keyof typeof LOCAL_APPS;
diff --git a/packages/tasks/src/model-libraries-snippets.ts b/packages/tasks/src/model-libraries-snippets.ts
@@ -139,6 +139,32 @@ depth = model.infer_image(raw_img) # HxW raw depth map in numpy
 	];
 };
 
+export const depth_pro = (model: ModelData): string[] => {
+	const installSnippet = `# Download checkpoint
+pip install huggingface-hub
+huggingface-cli download --local-dir checkpoints ${model.id}`;
+
+	const inferenceSnippet = `import depth_pro
+
+# Load model and preprocessing transform
+model, transform = depth_pro.create_model_and_transforms()
+model.eval()
+
+# Load and preprocess an image.
+image, _, f_px = depth_pro.load_rgb("example.png")
+image = transform(image)
+
+# Run inference.
+prediction = model.infer(image, f_px=f_px)
+
+# Results: 1. Depth in meters
+depth = prediction["depth"]
+# Results: 2. Focal length in pixels
+focallength_px = prediction["focallength_px"]`;
+
+	return [installSnippet, inferenceSnippet];
+};
+
 const diffusersDefaultPrompt = "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k";
 
 const diffusers_default = (model: ModelData) => [
diff --git a/packages/tasks/src/model-libraries.ts b/packages/tasks/src/model-libraries.ts
@@ -165,6 +165,14 @@ export const MODEL_LIBRARIES_UI_ELEMENTS = {
 		filter: false,
 		countDownloads: `path_extension:"pth"`,
 	},
+	"depth-pro": {
+		prettyLabel: "Depth Pro",
+		repoName: "Depth Pro",
+		repoUrl: "https://github.com/apple/ml-depth-pro",
+		countDownloads: `path_extension:"pt"`,
+		snippets: snippets.depth_pro,
+		filter: false,
+	},
 	diffree: {
 		prettyLabel: "Diffree",
 		repoName: "Diffree",
@@ -658,6 +666,13 @@ export const MODEL_LIBRARIES_UI_ELEMENTS = {
 		filter: true,
 		countDownloads: `path:"models/default.zip"`,
 	},
+	"f5-tts": {
+		prettyLabel: "F5-TTS",
+		repoName: "F5-TTS",
+		repoUrl: "https://github.com/SWivid/F5-TTS",
+		filter: false,
+		countDownloads: `path_extension:"safetensors" OR path_extension:"pt"`,
+	},
 	tensorflowtts: {
 		prettyLabel: "TensorFlowTTS",
 		repoName: "TensorFlowTTS",

-Original file line number
+Diff line change
 ```html
 <script type="module">
     import { HfInference } from 'https://cdn.jsdelivr.net/npm/@huggingface/[email protected]/+esm';
 -    import { createRepo, commit, deleteRepo, listFiles } from "https://cdn.jsdelivr.net/npm/@huggingface/[email protected].1/+esm";
 +    import { createRepo, commit, deleteRepo, listFiles } from "https://cdn.jsdelivr.net/npm/@huggingface/[email protected].2/+esm";
 </script>
 ```
Original file line number	Diff line number	Diff line change
`@@ -1,7 +1,7 @@`
`1`	`1`	`{`
`2`	`2`	`"name": "@huggingface/hub",`
`3`	`3`	`"packageManager": "[email protected]",`
`4`		`- "version": "0.18.1",`
	`4`	`+ "version": "0.18.2",`
`5`	`5`	`"description": "Utilities to interact with the Hugging Face hub",`
`6`	`6`	`"repository": "https://github.com/huggingface/huggingface.js.git",`
`7`	`7`	`"publishConfig": {`
Original file line number	Diff line number	Diff line change
`@@ -1,7 +1,7 @@`
`1`	`1`	`{`
`2`	`2`	`"name": "@huggingface/tasks",`
`3`	`3`	`"packageManager": "[email protected]",`
`4`		`- "version": "0.12.19",`
	`4`	`+ "version": "0.12.22",`
`5`	`5`	`"description": "List of ML tasks for huggingface.co/tasks",`
`6`	`6`	`"repository": "https://github.com/huggingface/huggingface.js.git",`
`7`	`7`	`"publishConfig": {`
`@@ -51,5 +51,8 @@`
`51`	`51`	`"@types/node": "^20.11.5",`
`52`	`52`	`"quicktype-core": "https://github.com/huggingface/quicktype/raw/pack-18.0.17/packages/quicktype-core/quicktype-core-18.0.17.tgz",`
`53`	`53`	`"type-fest": "^3.13.1"`
	`54`	`+ },`
	`55`	`+ "dependencies": {`
	`56`	`+ "@huggingface/gguf": "workspace:^"`
`54`	`57`	`}`
`55`	`58`	`}`