1- import { listFiles } from '@huggingface/hub'
1+ import { type RepoFullName , type RepoType , listFiles , parseRepoType } from '@huggingface/hub'
22import type { DirSource , FileMetadata , FileSource , SourcePart } from './types.js'
33import { getFileName } from './utils.js'
44
5- export const baseUrl = 'https://huggingface.co/datasets '
5+ export const baseUrl = 'https://huggingface.co'
66
77function getSourceParts ( url : HFUrl ) : SourcePart [ ] {
8+ const fullName = getFullName ( url )
89 const sourceParts : SourcePart [ ] = [ {
9- sourceId : `${ baseUrl } /${ url . repo } /tree/${ url . branch } /` ,
10- text : `${ baseUrl } /${ url . repo } /${ url . action } /${ url . branch } /` ,
10+ sourceId : `${ baseUrl } /${ fullName } /tree/${ url . branch } /` ,
11+ text : `${ baseUrl } /${ fullName } /${ url . action } /${ url . branch } /` ,
1112 } ]
1213
1314 const pathParts = url . path . split ( '/' ) . filter ( d => d . length > 0 )
1415 const lastPart = pathParts . at ( - 1 )
1516 if ( lastPart ) {
1617 for ( const [ i , part ] of pathParts . slice ( 0 , - 1 ) . entries ( ) ) {
1718 sourceParts . push ( {
18- sourceId : `${ baseUrl } /${ url . repo } /tree/${ url . branch } /${ pathParts . slice ( 0 , i + 1 ) . join ( '/' ) } ` ,
19+ sourceId : `${ baseUrl } /${ fullName } /tree/${ url . branch } /${ pathParts . slice ( 0 , i + 1 ) . join ( '/' ) } ` ,
1920 text : part + '/' ,
2021 } )
2122 }
2223 sourceParts . push ( {
23- sourceId : `${ baseUrl } /${ url . repo } /${ url . action } /${ url . branch } ${ url . path } ` ,
24+ sourceId : `${ baseUrl } /${ fullName } /${ url . action } /${ url . branch } ${ url . path } ` ,
2425 text : lastPart ,
2526 } )
2627 }
2728 return sourceParts
2829}
2930function getPrefix ( url : DirectoryUrl ) : string {
30- return `${ url . origin } /datasets/${ url . repo } /tree/${ url . branch } ${ url . path } ` . replace ( / \/ $ / , '' )
31+ return `${ url . origin } /${ getFullName ( url ) } /tree/${ url . branch } ${ url . path } ` . replace ( / \/ $ / , '' )
32+ }
33+ function getFullName ( url : HFUrl ) : RepoFullName {
34+ return url . type === 'dataset' ? `datasets/${ url . repo } ` : url . type === 'space' ? `spaces/${ url . repo } ` : url . repo
3135}
3236async function fetchFilesList ( url : DirectoryUrl , options ?: { requestInit ?: RequestInit , accessToken ?: string } ) : Promise < FileMetadata [ ] > {
3337 const filesIterator = listFiles ( {
34- repo : `datasets/${ url . repo } ` ,
38+ repo : {
39+ name : url . repo ,
40+ type : url . type ,
41+ } ,
3542 revision : url . branch ,
3643 path : 'path' in url ? url . path . replace ( / ^ \/ / , '' ) : '' , // remove leading slash if any
3744 expand : true ,
@@ -44,7 +51,7 @@ async function fetchFilesList(url: DirectoryUrl, options?: {requestInit?: Reques
4451 eTag : file . lastCommit ?. id ,
4552 size : file . size ,
4653 lastModified : file . lastCommit ?. date ,
47- sourceId : `${ url . origin } /datasets/ ${ url . repo } /${ file . type === 'file' ? 'blob' : 'tree' } /${ url . branch } /${ file . path } ` . replace ( / \/ $ / , '' ) ,
54+ sourceId : `${ url . origin } /${ getFullName ( url ) } /${ file . type === 'file' ? 'blob' : 'tree' } /${ url . branch } /${ file . path } ` . replace ( / \/ $ / , '' ) ,
4855 kind : file . type === 'file' ? 'file' : 'directory' , // 'unknown' is considered as a directory
4956 } )
5057 }
@@ -54,7 +61,7 @@ export function getHuggingFaceSource(sourceId: string, options?: {requestInit?:
5461 try {
5562 const url = parseHuggingFaceUrl ( sourceId )
5663 async function fetchVersions ( ) {
57- const refsList = await fetchRefsList ( url . repo , options )
64+ const refsList = await fetchRefsList ( url , options )
5865 return {
5966 label : 'Branches' ,
6067 versions : refsList . map ( ( { refType, name, ref } ) => {
@@ -65,7 +72,7 @@ export function getHuggingFaceSource(sourceId: string, options?: {requestInit?:
6572 // remove refs/heads/ from the ref name
6673 // e.g. refs/heads/main -> main
6774 const fixedRef = refType === 'branches' ? ref . replace ( / r e f s \/ h e a d s \/ / , '' ) : ref
68- const branchSourceId = `${ url . origin } /datasets/ ${ url . repo } /${ url . kind === 'file' ? 'blob' : 'tree' } /${ fixedRef } ${ url . path } `
75+ const branchSourceId = `${ url . origin } /${ getFullName ( url ) } /${ url . kind === 'file' ? 'blob' : 'tree' } /${ fixedRef } ${ url . path } `
6976 return {
7077 label,
7178 sourceId : branchSourceId ,
@@ -98,25 +105,24 @@ export function getHuggingFaceSource(sourceId: string, options?: {requestInit?:
98105 }
99106}
100107
101- export interface DirectoryUrl {
102- kind : 'directory' ;
103- source : string ;
104- origin : string ;
105- repo : string ;
106- action : 'tree' ;
107- branch : string ;
108- path : string ;
108+ interface BaseUrl {
109+ source : string
110+ origin : string
111+ type : RepoType
112+ repo : string
113+ branch : string
114+ path : string
115+ }
116+
117+ export interface DirectoryUrl extends BaseUrl {
118+ kind : 'directory'
119+ action : 'tree'
109120}
110121
111- export interface FileUrl {
112- kind : 'file' ;
113- source : string ;
114- origin : string ;
115- repo : string ;
116- action : 'resolve' | 'blob' ;
117- branch : string ;
118- path : string ;
119- resolveUrl : string ;
122+ export interface FileUrl extends BaseUrl {
123+ kind : 'file'
124+ action : 'resolve' | 'blob'
125+ resolveUrl : string
120126}
121127
122128type HFUrl = DirectoryUrl | FileUrl ;
@@ -135,69 +141,74 @@ export function parseHuggingFaceUrl(url: string): HFUrl {
135141 throw new Error ( 'Not a Hugging Face URL' )
136142 }
137143
138- const repoGroups = / ^ \/ d a t a s e t s \/ (?< namespace > [ ^ / ] + ) \/ (?< dataset > [ ^ / ] + ) \/ ? $ / . exec (
144+ const repoGroups = / ^ (?< type > \/ d a t a s e t s | \/ s p a c e s ) \/ (?< namespace > [ ^ / ] + ) \/ (?< repo > [ ^ / ] + ) \/ ? $ / . exec (
139145 urlObject . pathname
140146 ) ?. groups
141- if ( repoGroups ?. namespace !== undefined && repoGroups . dataset !== undefined ) {
147+ if ( repoGroups ?. type !== undefined && repoGroups . namespace !== undefined && repoGroups . repo !== undefined ) {
142148 return {
143149 kind : 'directory' ,
144150 source : url ,
145151 origin : urlObject . origin ,
146- repo : repoGroups . namespace + '/' + repoGroups . dataset ,
152+ type : parseRepoType ( repoGroups . type . slice ( 1 ) ) ,
153+ repo : repoGroups . namespace + '/' + repoGroups . repo ,
147154 action : 'tree' ,
148155 branch : 'main' , // hardcode the default branch
149156 path : '' ,
150157 }
151158 }
152159
153160 const folderGroups =
154- / ^ \/ d a t a s e t s \/ (?< namespace > [ ^ / ] + ) \/ (?< dataset > [ ^ / ] + ) \/ (?< action > t r e e ) \/ (?< branch > ( r e f s \/ ( c o n v e r t | p r ) \/ ) ? [ ^ / ] + ) (?< path > ( \/ [ ^ / ] + ) * ) \/ ? $ / . exec (
161+ / ^ (?< type > \/ d a t a s e t s | \/ s p a c e s ) \/ (?< namespace > [ ^ / ] + ) \/ (?< repo > [ ^ / ] + ) \/ (?< action > t r e e ) \/ (?< branch > ( r e f s \/ ( c o n v e r t | p r ) \/ ) ? [ ^ / ] + ) (?< path > ( \/ [ ^ / ] + ) * ) \/ ? $ / . exec (
155162 urlObject . pathname
156163 ) ?. groups
157164 if (
158- folderGroups ?. namespace !== undefined &&
159- folderGroups . dataset !== undefined &&
165+ folderGroups ?. type !== undefined &&
166+ folderGroups . namespace !== undefined &&
167+ folderGroups . repo !== undefined &&
160168 folderGroups . action !== undefined &&
161169 folderGroups . branch !== undefined &&
162170 folderGroups . path !== undefined &&
163171 folderGroups . branch !== 'refs'
164172 ) {
165173 const branch = folderGroups . branch . replace ( / \/ / g, '%2F' )
166- const source = `${ urlObject . origin } /datasets/ ${ folderGroups . namespace } /${ folderGroups . dataset } /${ folderGroups . action } /${ branch } ${ folderGroups . path } `
174+ const source = `${ urlObject . origin } ${ folderGroups . type } / ${ folderGroups . namespace } /${ folderGroups . repo } /${ folderGroups . action } /${ branch } ${ folderGroups . path } `
167175 return {
168176 kind : 'directory' ,
169177 source,
170178 origin : urlObject . origin ,
171- repo : folderGroups . namespace + '/' + folderGroups . dataset ,
179+ type : parseRepoType ( folderGroups . type . slice ( 1 ) ) ,
180+ repo : folderGroups . namespace + '/' + folderGroups . repo ,
172181 action : 'tree' ,
173182 branch,
174183 path : folderGroups . path ,
175184 }
176185 }
177186
178187 const fileGroups =
179- / ^ \/ d a t a s e t s \/ (?< namespace > [ ^ / ] + ) \/ (?< dataset > [ ^ / ] + ) \/ (?< action > b l o b | r e s o l v e ) \/ (?< branch > ( r e f s \/ ( c o n v e r t | p r ) \/ ) ? [ ^ / ] + ) (?< path > ( \/ [ ^ / ] + ) + ) $ / . exec (
188+ / ^ (?< type > \/ d a t a s e t s | \/ s p a c e s ) \/ (?< namespace > [ ^ / ] + ) \/ (?< repo > [ ^ / ] + ) \/ (?< action > b l o b | r e s o l v e ) \/ (?< branch > ( r e f s \/ ( c o n v e r t | p r ) \/ ) ? [ ^ / ] + ) (?< path > ( \/ [ ^ / ] + ) + ) $ / . exec (
180189 urlObject . pathname
181190 ) ?. groups
182191 if (
183- fileGroups ?. namespace !== undefined &&
184- fileGroups . dataset !== undefined &&
192+ fileGroups ?. type !== undefined &&
193+ fileGroups . namespace !== undefined &&
194+ fileGroups . repo !== undefined &&
185195 fileGroups . action !== undefined &&
186196 fileGroups . branch !== undefined &&
187197 fileGroups . path !== undefined &&
188198 fileGroups . branch !== 'refs'
189199 ) {
190200 const branch = fileGroups . branch . replace ( / \/ / g, '%2F' )
191- const source = `${ urlObject . origin } /datasets/ ${ fileGroups . namespace } /${ fileGroups . dataset } /${ fileGroups . action } /${ branch } ${ fileGroups . path } `
201+ const source = `${ urlObject . origin } ${ fileGroups . type } / ${ fileGroups . namespace } /${ fileGroups . repo } /${ fileGroups . action } /${ branch } ${ fileGroups . path } `
192202 return {
193203 kind : 'file' ,
194204 source,
195205 origin : urlObject . origin ,
196- repo : fileGroups . namespace + '/' + fileGroups . dataset ,
206+ type : parseRepoType ( fileGroups . type . slice ( 1 ) ) ,
207+ repo : fileGroups . namespace + '/' + fileGroups . repo ,
197208 action : fileGroups . action === 'blob' ? 'blob' : 'resolve' ,
198209 branch,
199210 path : fileGroups . path ,
200- resolveUrl : `${ urlObject . origin } /datasets/ ${ fileGroups . namespace } /${ fileGroups . dataset } /resolve/${ branch } ${ fileGroups . path } ` ,
211+ resolveUrl : `${ urlObject . origin } ${ fileGroups . type } / ${ fileGroups . namespace } /${ fileGroups . repo } /resolve/${ branch } ${ fileGroups . path } ` ,
201212 }
202213 }
203214
@@ -236,7 +247,7 @@ export interface RefMetadata extends RefResponse {
236247 * @returns the list of branches, tags, pull requests, and converts
237248 */
238249export async function fetchRefsList (
239- repo : string ,
250+ url : HFUrl ,
240251 options ?: { requestInit ?: RequestInit , accessToken ?: string }
241252) : Promise < RefMetadata [ ] > {
242253 if ( options ?. accessToken && ! options . accessToken . startsWith ( 'hf_' ) ) {
@@ -247,7 +258,7 @@ export async function fetchRefsList(
247258 if ( options ?. accessToken ) {
248259 headers . set ( 'Authorization' , `Bearer ${ options . accessToken } ` )
249260 }
250- const response = await fetch ( `https://huggingface.co/api/datasets/ ${ repo } /refs` , { ...options ?. requestInit , headers } )
261+ const response = await fetch ( `https://huggingface.co/api/${ getFullName ( url ) } /refs` , { ...options ?. requestInit , headers } )
251262 if ( ! response . ok ) {
252263 throw new Error ( `HTTP error ${ response . status . toString ( ) } ` )
253264 }
0 commit comments