Skip to content

Commit 876caee

Browse files
authored
Merge branch 'main' into update_tasks
2 parents b64f1dd + 4a0c8ff commit 876caee

File tree

12 files changed

+173
-44
lines changed

12 files changed

+173
-44
lines changed

.github/pull_request_template/new_library.md

Lines changed: 0 additions & 34 deletions
This file was deleted.

.github/workflows/lint.yml

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,12 +18,15 @@ jobs:
1818
- name: "Extracting the merge base into 'SINCE'"
1919
id: since
2020
run: |
21-
if [ -z "${{ github.event.pull_request.head.ref }}" ]
21+
if [ -z $PR_REF ]
2222
then
23-
echo "SINCE=${{ github.sha }}^1" >> $GITHUB_OUTPUT
23+
echo "SINCE=$SHA^1" >> $GITHUB_OUTPUT
2424
else
25-
echo "SINCE=$(git merge-base origin/${{ github.event.pull_request.base.ref }} ${{ github.sha }})" >> $GITHUB_OUTPUT
25+
echo "SINCE=$(git merge-base origin/$PR_REF $SHA)" >> $GITHUB_OUTPUT
2626
fi
27+
env:
28+
PR_REF: ${{ github.event.pull_request.head.ref }}
29+
SHA: ${{ github.sha }}
2730

2831
- run: corepack enable
2932

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ You can run our packages with vanilla JS, without any bundler, by using a CDN or
9393
```html
9494
<script type="module">
9595
import { HfInference } from 'https://cdn.jsdelivr.net/npm/@huggingface/[email protected]/+esm';
96-
import { createRepo, commit, deleteRepo, listFiles } from "https://cdn.jsdelivr.net/npm/@huggingface/[email protected].1/+esm";
96+
import { createRepo, commit, deleteRepo, listFiles } from "https://cdn.jsdelivr.net/npm/@huggingface/[email protected].2/+esm";
9797
</script>
9898
```
9999

packages/gguf/package.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
{
22
"name": "@huggingface/gguf",
33
"packageManager": "[email protected]",
4-
"version": "0.1.10",
4+
"version": "0.1.12",
55
"description": "a GGUF parser that works on remotely hosted files",
66
"repository": "https://github.com/huggingface/huggingface.js.git",
77
"publishConfig": {
@@ -27,6 +27,7 @@
2727
},
2828
"source": "index.ts",
2929
"scripts": {
30+
"prepare": "pnpm run build",
3031
"lint": "eslint --quiet --fix --ext .cjs,.ts .",
3132
"lint:check": "eslint --ext .cjs,.ts .",
3233
"format": "prettier --write .",

packages/gguf/src/gguf.spec.ts

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,13 @@
11
import { beforeAll, describe, expect, it } from "vitest";
22
import type { GGUFParseOutput } from "./gguf";
3-
import { GGMLFileQuantizationType, GGMLQuantizationType, gguf, ggufAllShards, parseGgufShardFilename } from "./gguf";
3+
import {
4+
GGMLFileQuantizationType,
5+
GGMLQuantizationType,
6+
gguf,
7+
ggufAllShards,
8+
parseGgufShardFilename,
9+
parseGGUFQuantLabel,
10+
} from "./gguf";
411
import fs from "node:fs";
512

613
const URL_LLAMA = "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/191239b/llama-2-7b-chat.Q2_K.gguf";
@@ -266,4 +273,14 @@ describe("gguf", () => {
266273
const { parameterCount } = await ggufAllShards(URL_SHARDED_GROK);
267274
expect(parameterCount).toEqual(316_490_127_360); // 316B
268275
});
276+
277+
it("parse quant label", async () => {
278+
expect(parseGGUFQuantLabel("Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf")).toEqual("Q4_K_M");
279+
expect(parseGGUFQuantLabel("subdir/Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf")).toEqual("Q4_K_M");
280+
expect(parseGGUFQuantLabel("Codestral-22B-v0.1-Q2_K.gguf")).toEqual("Q2_K");
281+
expect(parseGGUFQuantLabel("Codestral-22B-v0.1.gguf")).toEqual(undefined);
282+
expect(parseGGUFQuantLabel("Codestral-22B-v0.1-F32-Q2_K.gguf")).toEqual("Q2_K"); // gguf name with two quant labels [F32, Q2_K]
283+
expect(parseGGUFQuantLabel("Codestral-22B-v0.1-IQ3_XS.gguf")).toEqual(undefined); // TODO: investigate IQ3_XS
284+
expect(parseGGUFQuantLabel("Codestral-22B-v0.1-Q4_0_4_4.gguf")).toEqual("Q4_0"); // TODO: investigate Q4_0_4_4
285+
});
269286
});

packages/gguf/src/gguf.ts

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import type { MetadataValue, Version, GGUFMetadata, GGUFTensorInfo, GGUFParseOutput } from "./types";
2-
import { GGUFValueType } from "./types";
2+
import { GGMLQuantizationType, GGUFValueType } from "./types";
33
import { isBackend } from "./utils/isBackend";
44
import { promisesQueue } from "./utils/promisesQueue";
55

@@ -29,6 +29,15 @@ export function parseGgufShardFilename(filename: string): GgufShardFileInfo | nu
2929
return null;
3030
}
3131

32+
const ggufQuants = Object.values(GGMLQuantizationType).filter((v): v is string => typeof v === "string");
33+
export const GGUF_QUANT_RE = new RegExp(`(?<quant>${ggufQuants.join("|")})` + "(_(?<sizeVariation>[A-Z]+))?");
34+
export const GGUF_QUANT_RE_GLOBAL = new RegExp(GGUF_QUANT_RE, "g");
35+
36+
export function parseGGUFQuantLabel(fname: string): string | undefined {
37+
const quantLabel = fname.toUpperCase().match(GGUF_QUANT_RE_GLOBAL)?.at(-1); // if there is multiple quant substrings in a name, we prefer the last one
38+
return quantLabel;
39+
}
40+
3241
const isVersion = (version: number): version is Version => version === 1 || version === 2 || version === 3;
3342

3443
/**

packages/hub/package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
{
22
"name": "@huggingface/hub",
33
"packageManager": "[email protected]",
4-
"version": "0.18.1",
4+
"version": "0.18.2",
55
"description": "Utilities to interact with the Hugging Face hub",
66
"repository": "https://github.com/huggingface/huggingface.js.git",
77
"publishConfig": {

packages/tasks/package.json

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
{
22
"name": "@huggingface/tasks",
33
"packageManager": "[email protected]",
4-
"version": "0.12.19",
4+
"version": "0.12.22",
55
"description": "List of ML tasks for huggingface.co/tasks",
66
"repository": "https://github.com/huggingface/huggingface.js.git",
77
"publishConfig": {
@@ -51,5 +51,8 @@
5151
"@types/node": "^20.11.5",
5252
"quicktype-core": "https://github.com/huggingface/quicktype/raw/pack-18.0.17/packages/quicktype-core/quicktype-core-18.0.17.tgz",
5353
"type-fest": "^3.13.1"
54+
},
55+
"dependencies": {
56+
"@huggingface/gguf": "workspace:^"
5457
}
5558
}

packages/tasks/pnpm-lock.yaml

Lines changed: 5 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

packages/tasks/src/local-apps.ts

Lines changed: 85 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import type { ModelData } from "./model-data";
22
import type { PipelineType } from "./pipelines";
3+
import { parseGGUFQuantLabel } from "@huggingface/gguf";
34

45
export interface LocalAppSnippet {
56
/**
@@ -53,6 +54,7 @@ export type LocalApp = {
5354
/**
5455
* And if not (mostly llama.cpp), snippet to copy/paste in your terminal
5556
* Support the placeholder {{GGUF_FILE}} that will be replaced by the gguf file path or the list of available files.
57+
* Support the placeholder {{OLLAMA_TAG}} that will be replaced by the list of available quant tags or will be removed if there are no multiple quant files in a same repo.
5658
*/
5759
snippet: (model: ModelData, filepath?: string) => string | string[] | LocalAppSnippet | LocalAppSnippet[];
5860
}
@@ -77,11 +79,18 @@ function isMarlinModel(model: ModelData): boolean {
7779
function isTransformersModel(model: ModelData): boolean {
7880
return model.tags.includes("transformers");
7981
}
82+
function isTgiModel(model: ModelData): boolean {
83+
return model.tags.includes("text-generation-inference");
84+
}
8085

8186
function isLlamaCppGgufModel(model: ModelData) {
8287
return !!model.gguf?.context_length;
8388
}
8489

90+
function isMlxModel(model: ModelData) {
91+
return model.tags.includes("mlx");
92+
}
93+
8594
const snippetLlamacpp = (model: ModelData, filepath?: string): LocalAppSnippet[] => {
8695
const command = (binary: string) =>
8796
[
@@ -119,6 +128,32 @@ const snippetLlamacpp = (model: ModelData, filepath?: string): LocalAppSnippet[]
119128
];
120129
};
121130

131+
const snippetNodeLlamaCppCli = (model: ModelData, filepath?: string): LocalAppSnippet[] => {
132+
return [
133+
{
134+
title: "Chat with the model",
135+
content: [
136+
`npx -y node-llama-cpp chat \\`,
137+
` --model "hf:${model.id}/${filepath ?? "{{GGUF_FILE}}"}" \\`,
138+
` --prompt 'Hi there!'`,
139+
].join("\n"),
140+
},
141+
{
142+
title: "Estimate the model compatibility with your hardware",
143+
content: `npx -y node-llama-cpp inspect estimate "hf:${model.id}/${filepath ?? "{{GGUF_FILE}}"}"`,
144+
},
145+
];
146+
};
147+
148+
const snippetOllama = (model: ModelData, filepath?: string): string => {
149+
if (filepath) {
150+
const quantLabel = parseGGUFQuantLabel(filepath);
151+
const ollamatag = quantLabel ? `:${quantLabel}` : "";
152+
return `ollama run hf.co/${model.id}${ollamatag}`;
153+
}
154+
return `ollama run hf.co/${model.id}{{OLLAMA_TAG}}`;
155+
};
156+
122157
const snippetLocalAI = (model: ModelData, filepath?: string): LocalAppSnippet[] => {
123158
const command = (binary: string) =>
124159
["# Load and run the model:", `${binary} huggingface://${model.id}/${filepath ?? "{{GGUF_FILE}}"}`].join("\n");
@@ -180,6 +215,34 @@ const snippetVllm = (model: ModelData): LocalAppSnippet[] => {
180215
},
181216
];
182217
};
218+
const snippetTgi = (model: ModelData): LocalAppSnippet[] => {
219+
const runCommand = [
220+
"# Call the server using curl:",
221+
`curl -X POST "http://localhost:8000/v1/chat/completions" \\`,
222+
` -H "Content-Type: application/json" \\`,
223+
` --data '{`,
224+
` "model": "${model.id}",`,
225+
` "messages": [`,
226+
` {"role": "user", "content": "What is the capital of France?"}`,
227+
` ]`,
228+
` }'`,
229+
];
230+
return [
231+
{
232+
title: "Use Docker images",
233+
setup: [
234+
"# Deploy with docker on Linux:",
235+
`docker run --gpus all \\`,
236+
` -v ~/.cache/huggingface:/root/.cache/huggingface \\`,
237+
` -e HF_TOKEN="<secret>" \\`,
238+
` -p 8000:80 \\`,
239+
` ghcr.io/huggingface/text-generation-inference:latest \\`,
240+
` --model-id ${model.id}`,
241+
].join("\n"),
242+
content: [runCommand.join("\n")],
243+
},
244+
];
245+
};
183246

184247
/**
185248
* Add your new local app here.
@@ -200,6 +263,13 @@ export const LOCAL_APPS = {
200263
displayOnModelPage: isLlamaCppGgufModel,
201264
snippet: snippetLlamacpp,
202265
},
266+
"node-llama-cpp": {
267+
prettyLabel: "node-llama-cpp",
268+
docsUrl: "https://node-llama-cpp.withcat.ai",
269+
mainTask: "text-generation",
270+
displayOnModelPage: isLlamaCppGgufModel,
271+
snippet: snippetNodeLlamaCppCli,
272+
},
203273
vllm: {
204274
prettyLabel: "vLLM",
205275
docsUrl: "https://docs.vllm.ai",
@@ -214,11 +284,18 @@ export const LOCAL_APPS = {
214284
(model.pipeline_tag === "text-generation" || model.pipeline_tag === "image-text-to-text"),
215285
snippet: snippetVllm,
216286
},
287+
tgi: {
288+
prettyLabel: "TGI",
289+
docsUrl: "https://huggingface.co/docs/text-generation-inference/",
290+
mainTask: "text-generation",
291+
displayOnModelPage: isTgiModel,
292+
snippet: snippetTgi,
293+
},
217294
lmstudio: {
218295
prettyLabel: "LM Studio",
219296
docsUrl: "https://lmstudio.ai",
220297
mainTask: "text-generation",
221-
displayOnModelPage: isLlamaCppGgufModel,
298+
displayOnModelPage: (model) => isLlamaCppGgufModel(model) || isMlxModel(model),
222299
deeplink: (model, filepath) =>
223300
new URL(`lmstudio://open_from_hf?model=${model.id}${filepath ? `&file=${filepath}` : ""}`),
224301
},
@@ -323,6 +400,13 @@ export const LOCAL_APPS = {
323400
displayOnModelPage: (model) => model.library_name === "diffusers" && model.pipeline_tag === "text-to-image",
324401
deeplink: (model) => new URL(`https://models.invoke.ai/huggingface/${model.id}`),
325402
},
403+
ollama: {
404+
prettyLabel: "Ollama",
405+
docsUrl: "https://ollama.com",
406+
mainTask: "text-generation",
407+
displayOnModelPage: isLlamaCppGgufModel,
408+
snippet: snippetOllama,
409+
},
326410
} satisfies Record<string, LocalApp>;
327411

328412
export type LocalAppKey = keyof typeof LOCAL_APPS;

0 commit comments

Comments
 (0)