Skip to content

Commit 4fb5188

Browse files
committed
support space URLs
1 parent d71562e commit 4fb5188

File tree

2 files changed

+130
-176
lines changed

2 files changed

+130
-176
lines changed

src/lib/sources/huggingFaceSource.ts

Lines changed: 55 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -1,37 +1,44 @@
1-
import { listFiles } from '@huggingface/hub'
1+
import { type RepoFullName, type RepoType, listFiles, parseRepoType } from '@huggingface/hub'
22
import type { DirSource, FileMetadata, FileSource, SourcePart } from './types.js'
33
import { getFileName } from './utils.js'
44

5-
export const baseUrl = 'https://huggingface.co/datasets'
5+
export const baseUrl = 'https://huggingface.co'
66

77
function getSourceParts(url: HFUrl): SourcePart[] {
8+
const fullName = getFullName(url)
89
const sourceParts: SourcePart[] = [{
9-
sourceId: `${baseUrl}/${url.repo}/tree/${url.branch}/`,
10-
text: `${baseUrl}/${url.repo}/${url.action}/${url.branch}/`,
10+
sourceId: `${baseUrl}/${fullName}/tree/${url.branch}/`,
11+
text: `${baseUrl}/${fullName}/${url.action}/${url.branch}/`,
1112
}]
1213

1314
const pathParts = url.path.split('/').filter(d => d.length > 0)
1415
const lastPart = pathParts.at(-1)
1516
if (lastPart) {
1617
for (const [i, part] of pathParts.slice(0, -1).entries()) {
1718
sourceParts.push({
18-
sourceId: `${baseUrl}/${url.repo}/tree/${url.branch}/${pathParts.slice(0, i + 1).join('/')}`,
19+
sourceId: `${baseUrl}/${fullName}/tree/${url.branch}/${pathParts.slice(0, i + 1).join('/')}`,
1920
text: part + '/',
2021
})
2122
}
2223
sourceParts.push({
23-
sourceId: `${baseUrl}/${url.repo}/${url.action}/${url.branch}${url.path}`,
24+
sourceId: `${baseUrl}/${fullName}/${url.action}/${url.branch}${url.path}`,
2425
text: lastPart,
2526
})
2627
}
2728
return sourceParts
2829
}
2930
function getPrefix(url: DirectoryUrl): string {
30-
return `${url.origin}/datasets/${url.repo}/tree/${url.branch}${url.path}`.replace(/\/$/, '')
31+
return `${url.origin}/${getFullName(url)}/tree/${url.branch}${url.path}`.replace(/\/$/, '')
32+
}
33+
function getFullName(url: HFUrl): RepoFullName {
34+
return url.type === 'dataset' ? `datasets/${url.repo}` : url.type === 'space' ? `spaces/${url.repo}` : url.repo
3135
}
3236
async function fetchFilesList(url: DirectoryUrl, options?: {requestInit?: RequestInit, accessToken?: string}): Promise<FileMetadata[]> {
3337
const filesIterator = listFiles({
34-
repo: `datasets/${url.repo}`,
38+
repo: {
39+
name: url.repo,
40+
type: url.type,
41+
},
3542
revision: url.branch,
3643
path: 'path' in url ? url.path.replace(/^\//, '') : '', // remove leading slash if any
3744
expand: true,
@@ -44,7 +51,7 @@ async function fetchFilesList(url: DirectoryUrl, options?: {requestInit?: Reques
4451
eTag: file.lastCommit?.id,
4552
size: file.size,
4653
lastModified: file.lastCommit?.date,
47-
sourceId: `${url.origin}/datasets/${url.repo}/${file.type === 'file' ? 'blob' : 'tree'}/${url.branch}/${file.path}`.replace(/\/$/, ''),
54+
sourceId: `${url.origin}/${getFullName(url)}/${file.type === 'file' ? 'blob' : 'tree'}/${url.branch}/${file.path}`.replace(/\/$/, ''),
4855
kind: file.type === 'file' ? 'file' : 'directory', // 'unknown' is considered as a directory
4956
})
5057
}
@@ -54,7 +61,7 @@ export function getHuggingFaceSource(sourceId: string, options?: {requestInit?:
5461
try {
5562
const url = parseHuggingFaceUrl(sourceId)
5663
async function fetchVersions() {
57-
const refsList = await fetchRefsList(url.repo, options)
64+
const refsList = await fetchRefsList(url, options)
5865
return {
5966
label: 'Branches',
6067
versions: refsList.map(({ refType, name, ref }) => {
@@ -65,7 +72,7 @@ export function getHuggingFaceSource(sourceId: string, options?: {requestInit?:
6572
// remove refs/heads/ from the ref name
6673
// e.g. refs/heads/main -> main
6774
const fixedRef = refType === 'branches' ? ref.replace(/refs\/heads\//, '') : ref
68-
const branchSourceId = `${url.origin}/datasets/${url.repo}/${url.kind === 'file' ? 'blob' : 'tree'}/${fixedRef}${url.path}`
75+
const branchSourceId = `${url.origin}/${getFullName(url)}/${url.kind === 'file' ? 'blob' : 'tree'}/${fixedRef}${url.path}`
6976
return {
7077
label,
7178
sourceId: branchSourceId,
@@ -98,25 +105,24 @@ export function getHuggingFaceSource(sourceId: string, options?: {requestInit?:
98105
}
99106
}
100107

101-
export interface DirectoryUrl {
102-
kind: 'directory';
103-
source: string;
104-
origin: string;
105-
repo: string;
106-
action: 'tree';
107-
branch: string;
108-
path: string;
108+
interface BaseUrl {
109+
source: string
110+
origin: string
111+
type: RepoType
112+
repo: string
113+
branch: string
114+
path: string
115+
}
116+
117+
export interface DirectoryUrl extends BaseUrl {
118+
kind: 'directory'
119+
action: 'tree'
109120
}
110121

111-
export interface FileUrl {
112-
kind: 'file';
113-
source: string;
114-
origin: string;
115-
repo: string;
116-
action: 'resolve' | 'blob';
117-
branch: string;
118-
path: string;
119-
resolveUrl: string;
122+
export interface FileUrl extends BaseUrl {
123+
kind: 'file'
124+
action: 'resolve' | 'blob'
125+
resolveUrl: string
120126
}
121127

122128
type HFUrl = DirectoryUrl | FileUrl;
@@ -135,69 +141,74 @@ export function parseHuggingFaceUrl(url: string): HFUrl {
135141
throw new Error('Not a Hugging Face URL')
136142
}
137143

138-
const repoGroups = /^\/datasets\/(?<namespace>[^/]+)\/(?<dataset>[^/]+)\/?$/.exec(
144+
const repoGroups = /^(?<type>\/datasets|\/spaces)\/(?<namespace>[^/]+)\/(?<repo>[^/]+)\/?$/.exec(
139145
urlObject.pathname
140146
)?.groups
141-
if (repoGroups?.namespace !== undefined && repoGroups.dataset !== undefined) {
147+
if (repoGroups?.type !== undefined && repoGroups.namespace !== undefined && repoGroups.repo !== undefined) {
142148
return {
143149
kind: 'directory',
144150
source: url,
145151
origin: urlObject.origin,
146-
repo: repoGroups.namespace + '/' + repoGroups.dataset,
152+
type: parseRepoType(repoGroups.type.slice(1)),
153+
repo: repoGroups.namespace + '/' + repoGroups.repo,
147154
action: 'tree',
148155
branch: 'main', // hardcode the default branch
149156
path: '',
150157
}
151158
}
152159

153160
const folderGroups =
154-
/^\/datasets\/(?<namespace>[^/]+)\/(?<dataset>[^/]+)\/(?<action>tree)\/(?<branch>(refs\/(convert|pr)\/)?[^/]+)(?<path>(\/[^/]+)*)\/?$/.exec(
161+
/^(?<type>\/datasets|\/spaces)\/(?<namespace>[^/]+)\/(?<repo>[^/]+)\/(?<action>tree)\/(?<branch>(refs\/(convert|pr)\/)?[^/]+)(?<path>(\/[^/]+)*)\/?$/.exec(
155162
urlObject.pathname
156163
)?.groups
157164
if (
158-
folderGroups?.namespace !== undefined &&
159-
folderGroups.dataset !== undefined &&
165+
folderGroups?.type !== undefined &&
166+
folderGroups.namespace !== undefined &&
167+
folderGroups.repo !== undefined &&
160168
folderGroups.action !== undefined &&
161169
folderGroups.branch !== undefined &&
162170
folderGroups.path !== undefined &&
163171
folderGroups.branch !== 'refs'
164172
) {
165173
const branch = folderGroups.branch.replace(/\//g, '%2F')
166-
const source = `${urlObject.origin}/datasets/${folderGroups.namespace}/${folderGroups.dataset}/${folderGroups.action}/${branch}${folderGroups.path}`
174+
const source = `${urlObject.origin}${folderGroups.type}/${folderGroups.namespace}/${folderGroups.repo}/${folderGroups.action}/${branch}${folderGroups.path}`
167175
return {
168176
kind: 'directory',
169177
source,
170178
origin: urlObject.origin,
171-
repo: folderGroups.namespace + '/' + folderGroups.dataset,
179+
type: parseRepoType(folderGroups.type.slice(1)),
180+
repo: folderGroups.namespace + '/' + folderGroups.repo,
172181
action: 'tree',
173182
branch,
174183
path: folderGroups.path,
175184
}
176185
}
177186

178187
const fileGroups =
179-
/^\/datasets\/(?<namespace>[^/]+)\/(?<dataset>[^/]+)\/(?<action>blob|resolve)\/(?<branch>(refs\/(convert|pr)\/)?[^/]+)(?<path>(\/[^/]+)+)$/.exec(
188+
/^(?<type>\/datasets|\/spaces)\/(?<namespace>[^/]+)\/(?<repo>[^/]+)\/(?<action>blob|resolve)\/(?<branch>(refs\/(convert|pr)\/)?[^/]+)(?<path>(\/[^/]+)+)$/.exec(
180189
urlObject.pathname
181190
)?.groups
182191
if (
183-
fileGroups?.namespace !== undefined &&
184-
fileGroups.dataset !== undefined &&
192+
fileGroups?.type !== undefined &&
193+
fileGroups.namespace !== undefined &&
194+
fileGroups.repo !== undefined &&
185195
fileGroups.action !== undefined &&
186196
fileGroups.branch !== undefined &&
187197
fileGroups.path !== undefined &&
188198
fileGroups.branch !== 'refs'
189199
) {
190200
const branch = fileGroups.branch.replace(/\//g, '%2F')
191-
const source = `${urlObject.origin}/datasets/${fileGroups.namespace}/${fileGroups.dataset}/${fileGroups.action}/${branch}${fileGroups.path}`
201+
const source = `${urlObject.origin}${fileGroups.type}/${fileGroups.namespace}/${fileGroups.repo}/${fileGroups.action}/${branch}${fileGroups.path}`
192202
return {
193203
kind: 'file',
194204
source,
195205
origin: urlObject.origin,
196-
repo: fileGroups.namespace + '/' + fileGroups.dataset,
206+
type: parseRepoType(fileGroups.type.slice(1)),
207+
repo: fileGroups.namespace + '/' + fileGroups.repo,
197208
action: fileGroups.action === 'blob' ? 'blob' : 'resolve',
198209
branch,
199210
path: fileGroups.path,
200-
resolveUrl: `${urlObject.origin}/datasets/${fileGroups.namespace}/${fileGroups.dataset}/resolve/${branch}${fileGroups.path}`,
211+
resolveUrl: `${urlObject.origin}${fileGroups.type}/${fileGroups.namespace}/${fileGroups.repo}/resolve/${branch}${fileGroups.path}`,
201212
}
202213
}
203214

@@ -236,7 +247,7 @@ export interface RefMetadata extends RefResponse {
236247
* @returns the list of branches, tags, pull requests, and converts
237248
*/
238249
export async function fetchRefsList(
239-
repo: string,
250+
url: HFUrl,
240251
options?: {requestInit?: RequestInit, accessToken?: string}
241252
): Promise<RefMetadata[]> {
242253
if (options?.accessToken && !options.accessToken.startsWith('hf_')) {
@@ -247,7 +258,7 @@ export async function fetchRefsList(
247258
if (options?.accessToken) {
248259
headers.set('Authorization', `Bearer ${options.accessToken}`)
249260
}
250-
const response = await fetch(`https://huggingface.co/api/datasets/${repo}/refs`, { ...options?.requestInit, headers })
261+
const response = await fetch(`https://huggingface.co/api/${getFullName(url)}/refs`, { ...options?.requestInit, headers })
251262
if (!response.ok) {
252263
throw new Error(`HTTP error ${response.status.toString()}`)
253264
}

0 commit comments

Comments
 (0)