|
1 | | -import { type RepoFullName, type RepoType, listFiles } from '@huggingface/hub' |
2 | 1 | import type { DirSource, FileMetadata, FileSource, SourcePart } from './types.js' |
3 | 2 | import { getFileName } from './utils.js' |
4 | 3 |
|
| 4 | +type RepoType = 'model' | 'dataset' | 'space' |
| 5 | + |
5 | 6 | interface BaseUrl { |
6 | 7 | source: string |
7 | 8 | origin: string |
@@ -45,7 +46,7 @@ interface RefMetadata extends RefResponse { |
45 | 46 |
|
46 | 47 | const baseUrl = 'https://huggingface.co' |
47 | 48 |
|
48 | | -function getFullName(url: HFUrl): RepoFullName { |
| 49 | +function getFullName(url: HFUrl): string { |
49 | 50 | return url.type === 'dataset' ? `datasets/${url.repo}` : url.type === 'space' ? `spaces/${url.repo}` : url.repo |
50 | 51 | } |
51 | 52 | function getSourceParts(url: HFUrl): SourcePart[] { |
@@ -74,15 +75,12 @@ function getSourceParts(url: HFUrl): SourcePart[] { |
74 | 75 | function getPrefix(url: DirectoryUrl): string { |
75 | 76 | return `${url.origin}/${getFullName(url)}/tree/${url.branch}${url.path}`.replace(/\/$/, '') |
76 | 77 | } |
77 | | -async function fetchFilesList(url: DirectoryUrl, options?: {requestInit?: RequestInit, accessToken?: string}): Promise<FileMetadata[]> { |
| 78 | +async function fetchFilesList(url: DirectoryUrl, options?: { requestInit?: RequestInit, accessToken?: string }): Promise<FileMetadata[]> { |
| 79 | + const repoFullName = getFullName(url) |
78 | 80 | const filesIterator = listFiles({ |
79 | | - repo: { |
80 | | - name: url.repo, |
81 | | - type: url.type, |
82 | | - }, |
| 81 | + repoFullName, |
83 | 82 | revision: url.branch, |
84 | 83 | path: 'path' in url ? url.path.replace(/^\//, '') : '', // remove leading slash if any |
85 | | - expand: true, |
86 | 84 | accessToken: options?.accessToken, |
87 | 85 | }) |
88 | 86 | const files: FileMetadata[] = [] |
@@ -256,7 +254,7 @@ export function parseHuggingFaceUrl(url: string): HFUrl { |
256 | 254 | * |
257 | 255 | * @returns the list of branches, tags, pull requests, and converts |
258 | 256 | */ |
259 | | -export async function fetchRefsList( |
| 257 | +async function fetchRefsList( |
260 | 258 | url: HFUrl, |
261 | 259 | options?: {requestInit?: RequestInit, accessToken?: string} |
262 | 260 | ): Promise<RefMetadata[]> { |
@@ -286,3 +284,77 @@ export async function fetchRefsList( |
286 | 284 | }) |
287 | 285 | }) |
288 | 286 | } |
| 287 | + |
| 288 | +/* |
| 289 | + * Copied and adapted from https://github.com/huggingface/huggingface.js/blob/main/packages/hub |
| 290 | + * MIT License, Copyright (c) 2023 Hugging Face |
| 291 | + */ |
| 292 | + |
| 293 | +interface ListFileEntry { |
| 294 | + type: 'file' | 'directory' | 'unknown'; |
| 295 | + size: number; |
| 296 | + path: string; |
| 297 | + lastCommit?: { |
| 298 | + date: string; |
| 299 | + id: string; |
| 300 | + }; |
| 301 | +} |
| 302 | + |
| 303 | +const HUB_URL = 'https://huggingface.co' |
| 304 | + |
| 305 | +/** |
| 306 | + * List files in a folder. To list ALL files in the directory, call it |
| 307 | + * with {@link params.recursive} set to `true`. |
| 308 | + */ |
| 309 | +async function* listFiles( |
| 310 | + params: { |
| 311 | + repoFullName: string; |
| 312 | + /** |
| 313 | + * Eg 'data' for listing all files in the 'data' folder. Leave it empty to list all |
| 314 | + * files in the repo. |
| 315 | + */ |
| 316 | + path?: string; |
| 317 | + revision?: string; |
| 318 | + /** |
| 319 | + * Custom fetch function to use instead of the default one, for example to use a proxy or edit headers. |
| 320 | + */ |
| 321 | + fetch?: typeof fetch; |
| 322 | + accessToken?: string; |
| 323 | + } |
| 324 | +): AsyncGenerator<ListFileEntry> { |
| 325 | + let url: string | undefined = `${HUB_URL}/api/${params.repoFullName}/tree/${ |
| 326 | + params.revision ?? 'main' |
| 327 | + }${params.path ? '/' + params.path : ''}?expand=true` |
| 328 | + |
| 329 | + while (url) { |
| 330 | + const res: Response = await (params.fetch ?? fetch)(url, { |
| 331 | + headers: { |
| 332 | + accept: 'application/json', |
| 333 | + ...params.accessToken ? { Authorization: `Bearer ${params.accessToken}` } : undefined, |
| 334 | + }, |
| 335 | + }) |
| 336 | + |
| 337 | + if (!res.ok) { |
| 338 | + throw new Error(`Failed to list files: ${res.status.toString()} ${res.statusText}`) |
| 339 | + } |
| 340 | + |
| 341 | + const items = await res.json() as ListFileEntry[] |
| 342 | + |
| 343 | + for (const item of items) { |
| 344 | + yield item |
| 345 | + } |
| 346 | + |
| 347 | + const linkHeader = res.headers.get('Link') |
| 348 | + |
| 349 | + url = linkHeader ? parseLinkHeader(linkHeader).next : undefined |
| 350 | + } |
| 351 | +} |
| 352 | + |
| 353 | +/** |
| 354 | + * Parse Link HTTP header, eg `<https://huggingface.co/api/datasets/bigscience/P3/tree/main?recursive=1&cursor=...>; rel="next"` |
| 355 | + */ |
| 356 | +export function parseLinkHeader(header: string): Record<string, string> { |
| 357 | + const regex = /<(https?:[/][/][^>]+)>;\s+rel="([^"]+)"/g |
| 358 | + |
| 359 | + return Object.fromEntries([...header.matchAll(regex)].map(([, url, rel]) => [rel, url])) as Record<string, string> |
| 360 | +} |
0 commit comments