Skip to content

Commit bac80ab

Browse files
committed
Merge branch 'main' into add-workflow-update-specs
2 parents 330d297 + 72b0d9b commit bac80ab

File tree

12 files changed

+258
-25
lines changed

12 files changed

+258
-25
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ You can run our packages with vanilla JS, without any bundler, by using a CDN or
9393
```html
9494
<script type="module">
9595
import { HfInference } from 'https://cdn.jsdelivr.net/npm/@huggingface/[email protected]/+esm';
96-
import { createRepo, commit, deleteRepo, listFiles } from "https://cdn.jsdelivr.net/npm/@huggingface/hub@0.19.0/+esm";
96+
import { createRepo, commit, deleteRepo, listFiles } from "https://cdn.jsdelivr.net/npm/@huggingface/hub@0.20.0/+esm";
9797
</script>
9898
```
9999

packages/hub/package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
{
22
"name": "@huggingface/hub",
33
"packageManager": "[email protected]",
4-
"version": "0.19.0",
4+
"version": "0.20.0",
55
"description": "Utilities to interact with the Hugging Face hub",
66
"repository": "https://github.com/huggingface/huggingface.js.git",
77
"publishConfig": {

packages/hub/src/lib/index.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ export * from "./model-info";
1919
export * from "./oauth-handle-redirect";
2020
export * from "./oauth-login-url";
2121
export * from "./parse-safetensors-metadata";
22+
export * from "./paths-info";
2223
export * from "./space-info";
2324
export * from "./upload-file";
2425
export * from "./upload-files";
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
import { expect, it, describe } from "vitest";
2+
import type { CommitInfo, PathInfo, SecurityFileStatus } from "./paths-info";
3+
import { pathsInfo } from "./paths-info";
4+
5+
describe("pathsInfo", () => {
6+
it("should fetch LFS path info", async () => {
7+
const result: PathInfo[] = await pathsInfo({
8+
repo: {
9+
name: "bert-base-uncased",
10+
type: "model",
11+
},
12+
paths: ["tf_model.h5"],
13+
revision: "dd4bc8b21efa05ec961e3efc4ee5e3832a3679c7",
14+
});
15+
16+
expect(result).toHaveLength(1);
17+
18+
const modelPathInfo = result[0];
19+
expect(modelPathInfo.path).toBe('tf_model.h5');
20+
expect(modelPathInfo.type).toBe('file');
21+
// lfs pointer, therefore lfs should be defined
22+
expect(modelPathInfo?.lfs).toBeDefined();
23+
expect(modelPathInfo?.lfs?.oid).toBe("a7a17d6d844b5de815ccab5f42cad6d24496db3850a2a43d8258221018ce87d2");
24+
expect(modelPathInfo?.lfs?.size).toBe(536063208);
25+
expect(modelPathInfo?.lfs?.pointerSize).toBe(134);
26+
27+
// should not include expand info
28+
expect(modelPathInfo.lastCommit).toBeUndefined();
29+
expect(modelPathInfo.securityFileStatus).toBeUndefined();
30+
});
31+
32+
it("expand parmas should fetch lastCommit and securityFileStatus", async () => {
33+
const result: (PathInfo & {
34+
lastCommit: CommitInfo,
35+
securityFileStatus: SecurityFileStatus,
36+
})[] = await pathsInfo({
37+
repo: {
38+
name: "bert-base-uncased",
39+
type: "model",
40+
},
41+
paths: ["tf_model.h5"],
42+
revision: "dd4bc8b21efa05ec961e3efc4ee5e3832a3679c7",
43+
expand: true, // include
44+
});
45+
46+
expect(result).toHaveLength(1);
47+
48+
const modelPathInfo = result[0];
49+
50+
// should include expand info
51+
expect(modelPathInfo.lastCommit).toBeDefined();
52+
expect(modelPathInfo.securityFileStatus).toBeDefined();
53+
54+
expect(modelPathInfo.lastCommit.id).toBe("dd4bc8b21efa05ec961e3efc4ee5e3832a3679c7");
55+
expect(modelPathInfo.lastCommit.title).toBe("Update tf_model.h5");
56+
expect(modelPathInfo.lastCommit.date.getTime()).toBe(1569268124000); // 2019-09-23T19:48:44.000Z
57+
});
58+
59+
it("non-LFS pointer should have lfs undefined", async () => {
60+
const result: (PathInfo)[] = await pathsInfo({
61+
repo: {
62+
name: "bert-base-uncased",
63+
type: "model",
64+
},
65+
paths: ["config.json"],
66+
revision: "dd4bc8b21efa05ec961e3efc4ee5e3832a3679c7",
67+
});
68+
69+
expect(result).toHaveLength(1);
70+
71+
const modelPathInfo = result[0];
72+
expect(modelPathInfo.path).toBe("config.json");
73+
expect(modelPathInfo.lfs).toBeUndefined();
74+
});
75+
});

packages/hub/src/lib/paths-info.ts

Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
import type { CredentialsParams, RepoDesignation } from "../types/public";
2+
import { checkCredentials } from "../utils/checkCredentials";
3+
import { toRepoId } from "../utils/toRepoId";
4+
import { HUB_URL } from "../consts";
5+
import { createApiError } from "../error";
6+
7+
export interface LfsPathInfo {
8+
"oid": string,
9+
"size": number,
10+
"pointerSize": number
11+
}
12+
13+
export interface CommitInfo {
14+
"id": string,
15+
"title": string,
16+
"date": Date,
17+
}
18+
19+
export interface SecurityFileStatus {
20+
"status": string,
21+
}
22+
23+
export interface PathInfo {
24+
path: string,
25+
type: string,
26+
oid: string,
27+
size: number,
28+
/**
29+
* Only defined when path is LFS pointer
30+
*/
31+
lfs?: LfsPathInfo,
32+
lastCommit?: CommitInfo,
33+
securityFileStatus?: SecurityFileStatus
34+
}
35+
36+
// Define the overloaded signatures
37+
export function pathsInfo(
38+
params: {
39+
repo: RepoDesignation;
40+
paths: string[];
41+
expand: true; // if expand true
42+
revision?: string;
43+
hubUrl?: string;
44+
/**
45+
* Custom fetch function to use instead of the default one, for example to use a proxy or edit headers.
46+
*/
47+
fetch?: typeof fetch;
48+
} & Partial<CredentialsParams>
49+
): Promise<(PathInfo & {lastCommit: CommitInfo, securityFileStatus: SecurityFileStatus })[]>;
50+
export function pathsInfo(
51+
params: {
52+
repo: RepoDesignation;
53+
paths: string[];
54+
expand?: boolean;
55+
revision?: string;
56+
hubUrl?: string;
57+
/**
58+
* Custom fetch function to use instead of the default one, for example to use a proxy or edit headers.
59+
*/
60+
fetch?: typeof fetch;
61+
} & Partial<CredentialsParams>
62+
): Promise<(PathInfo)[]>;
63+
64+
export async function pathsInfo(
65+
params: {
66+
repo: RepoDesignation;
67+
paths: string[];
68+
expand?: boolean;
69+
revision?: string;
70+
hubUrl?: string;
71+
/**
72+
* Custom fetch function to use instead of the default one, for example to use a proxy or edit headers.
73+
*/
74+
fetch?: typeof fetch;
75+
} & Partial<CredentialsParams>
76+
): Promise<PathInfo[]> {
77+
const accessToken = checkCredentials(params);
78+
const repoId = toRepoId(params.repo);
79+
80+
const hubUrl = params.hubUrl ?? HUB_URL;
81+
82+
const url = `${hubUrl}/api/${repoId.type}s/${repoId.name}/paths-info/${encodeURIComponent(params.revision ?? "main")}`;
83+
84+
const resp = await (params.fetch ?? fetch)(url, {
85+
method: "POST",
86+
headers: {
87+
...(params.credentials && {
88+
Authorization: `Bearer ${accessToken}`,
89+
}),
90+
'Accept': 'application/json',
91+
'Content-Type': 'application/json'
92+
},
93+
body: JSON.stringify({
94+
paths: params.paths,
95+
expand: params.expand,
96+
}),
97+
});
98+
99+
if (!resp.ok) {
100+
throw await createApiError(resp);
101+
}
102+
103+
const json: unknown = await resp.json();
104+
if(!Array.isArray(json)) throw new Error('malformed response: expected array');
105+
106+
return json.map((item: PathInfo) => ({
107+
path: item.path,
108+
lfs: item.lfs,
109+
type: item.type,
110+
oid: item.oid,
111+
size: item.size,
112+
// expand fields
113+
securityFileStatus: item.securityFileStatus,
114+
lastCommit: item.lastCommit ? {
115+
date: new Date(item.lastCommit.date),
116+
title: item.lastCommit.title,
117+
id: item.lastCommit.id,
118+
}: undefined,
119+
}));
120+
}

packages/tasks/.prettierignore

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
pnpm-lock.yaml
22
# In order to avoid code samples to have tabs, they don't display well on npm
33
README.md
4-
dist
4+
dist
5+
.tshy

packages/tasks/package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
{
22
"name": "@huggingface/tasks",
33
"packageManager": "[email protected]",
4-
"version": "0.13.1",
4+
"version": "0.13.2",
55
"description": "List of ML tasks for huggingface.co/tasks",
66
"repository": "https://github.com/huggingface/huggingface.js.git",
77
"publishConfig": {

packages/tasks/src/snippets/js.spec.ts

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,9 @@ describe("inference API snippets", () => {
1212
};
1313
const snippet = getJsInferenceSnippet(model, "api_token") as InferenceSnippet[];
1414

15-
expect(snippet[0].content).toEqual(`import { HfInference } from "@huggingface/inference"
15+
expect(snippet[0].content).toEqual(`import { HfInference } from "@huggingface/inference";
1616
17-
const client = new HfInference("api_token")
17+
const client = new HfInference("api_token");
1818
1919
let out = "";
2020
@@ -47,9 +47,9 @@ for await (const chunk of stream) {
4747
};
4848
const snippet = getJsInferenceSnippet(model, "api_token", { streaming: false }) as InferenceSnippet[];
4949

50-
expect(snippet[0].content).toEqual(`import { HfInference } from "@huggingface/inference"
50+
expect(snippet[0].content).toEqual(`import { HfInference } from "@huggingface/inference";
5151
52-
const client = new HfInference("api_token")
52+
const client = new HfInference("api_token");
5353
5454
const chatCompletion = await client.chatCompletion({
5555
model: "meta-llama/Llama-3.1-8B-Instruct",
@@ -74,9 +74,9 @@ console.log(chatCompletion.choices[0].message);`);
7474
};
7575
const snippet = getJsInferenceSnippet(model, "api_token") as InferenceSnippet[];
7676

77-
expect(snippet[0].content).toEqual(`import { HfInference } from "@huggingface/inference"
77+
expect(snippet[0].content).toEqual(`import { HfInference } from "@huggingface/inference";
7878
79-
const client = new HfInference("api_token")
79+
const client = new HfInference("api_token");
8080
8181
let out = "";
8282
@@ -120,9 +120,9 @@ for await (const chunk of stream) {
120120
};
121121
const snippet = getJsInferenceSnippet(model, "api_token") as InferenceSnippet[];
122122

123-
expect(snippet[0].content).toEqual(`import { HfInference } from "@huggingface/inference"
123+
expect(snippet[0].content).toEqual(`import { HfInference } from "@huggingface/inference";
124124
125-
const client = new HfInference("api_token")
125+
const client = new HfInference("api_token");
126126
127127
let out = "";
128128

packages/tasks/src/snippets/js.ts

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -58,9 +58,9 @@ export const snippetTextGeneration = (
5858
return [
5959
{
6060
client: "huggingface.js",
61-
content: `import { HfInference } from "@huggingface/inference"
61+
content: `import { HfInference } from "@huggingface/inference";
6262
63-
const client = new HfInference("${accessToken || `{API_TOKEN}`}")
63+
const client = new HfInference("${accessToken || `{API_TOKEN}`}");
6464
6565
let out = "";
6666
@@ -80,12 +80,12 @@ for await (const chunk of stream) {
8080
},
8181
{
8282
client: "openai",
83-
content: `import { OpenAI } from "openai"
83+
content: `import { OpenAI } from "openai";
8484
8585
const client = new OpenAI({
8686
baseURL: "https://api-inference.huggingface.co/v1/",
8787
apiKey: "${accessToken || `{API_TOKEN}`}"
88-
})
88+
});
8989
9090
let out = "";
9191
@@ -109,9 +109,9 @@ for await (const chunk of stream) {
109109
return [
110110
{
111111
client: "huggingface.js",
112-
content: `import { HfInference } from "@huggingface/inference"
112+
content: `import { HfInference } from "@huggingface/inference";
113113
114-
const client = new HfInference("${accessToken || `{API_TOKEN}`}")
114+
const client = new HfInference("${accessToken || `{API_TOKEN}`}");
115115
116116
const chatCompletion = await client.chatCompletion({
117117
model: "${model.id}",
@@ -123,12 +123,12 @@ console.log(chatCompletion.choices[0].message);`,
123123
},
124124
{
125125
client: "openai",
126-
content: `import { OpenAI } from "openai"
126+
content: `import { OpenAI } from "openai";
127127
128128
const client = new OpenAI({
129129
baseURL: "https://api-inference.huggingface.co/v1/",
130130
apiKey: "${accessToken || `{API_TOKEN}`}"
131-
})
131+
});
132132
133133
const chatCompletion = await client.chat.completions.create({
134134
model: "${model.id}",

packages/tasks/src/tasks/automatic-speech-recognition/data.ts

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,16 @@ const taskData: TaskDataCustom = {
66
description: "31,175 hours of multilingual audio-text dataset in 108 languages.",
77
id: "mozilla-foundation/common_voice_17_0",
88
},
9+
{
10+
description: "Multilingual and diverse audio dataset with 101k hours of audio.",
11+
id: "amphion/Emilia-Dataset",
12+
},
913
{
1014
description: "A dataset with 44.6k hours of English speaker data and 6k hours of other language speakers.",
1115
id: "parler-tts/mls_eng",
1216
},
1317
{
14-
description: "A multi-lingual audio dataset with 370K hours of audio.",
18+
description: "A multilingual audio dataset with 370K hours of audio.",
1519
id: "espnet/yodas",
1620
},
1721
],
@@ -54,6 +58,10 @@ const taskData: TaskDataCustom = {
5458
description: "An end-to-end model that performs ASR and Speech Translation by MetaAI.",
5559
id: "facebook/seamless-m4t-v2-large",
5660
},
61+
{
62+
description: "A powerful multilingual ASR and Speech Translation model by Nvidia.",
63+
id: "nvidia/canary-1b",
64+
},
5765
{
5866
description: "Powerful speaker diarization model.",
5967
id: "pyannote/speaker-diarization-3.1",
@@ -65,13 +73,17 @@ const taskData: TaskDataCustom = {
6573
id: "hf-audio/whisper-large-v3",
6674
},
6775
{
68-
description: "Fastest speech recognition application.",
69-
id: "sanchit-gandhi/whisper-jax",
76+
description: "Latest ASR model from Useful Sensors.",
77+
id: "mrfakename/Moonshinex",
7078
},
7179
{
7280
description: "A high quality speech and text translation model by Meta.",
7381
id: "facebook/seamless_m4t",
7482
},
83+
{
84+
description: "A powerful multilingual ASR and Speech Translation model by Nvidia",
85+
id: "nvidia/canary-1b",
86+
},
7587
],
7688
summary:
7789
"Automatic Speech Recognition (ASR), also known as Speech to Text (STT), is the task of transcribing a given audio to text. It has many applications, such as voice user interfaces.",

0 commit comments

Comments
 (0)