Skip to content

Commit 72322aa

Browse files
committed
handle folders in uploadFiles
1 parent c10c090 commit 72322aa

File tree

7 files changed

+92
-19
lines changed

7 files changed

+92
-19
lines changed

packages/hub/README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,8 @@ await hub.uploadFiles({
5757
},
5858
// Local file URL
5959
pathToFileURL("./pytorch-model.bin"),
60+
// Local folder URL
61+
pathToFileURL("./models"),
6062
// Web URL
6163
new URL("https://huggingface.co/xlm-roberta-base/resolve/main/tokenizer.json"),
6264
// Path + Web URL

packages/hub/package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
},
2020
"browser": {
2121
"./src/utils/sha256-node.ts": false,
22+
"./src/utils/sub-paths.ts": false,
2223
"./src/utils/FileBlob.ts": false,
2324
"./src/lib/cache-management.ts": false,
2425
"./src/lib/download-file-to-cache-dir.ts": false,

packages/hub/src/lib/commit.ts

Lines changed: 30 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,10 @@ import { promisesQueueStreaming } from "../utils/promisesQueueStreaming";
1818
import { sha256 } from "../utils/sha256";
1919
import { toRepoId } from "../utils/toRepoId";
2020
import { WebBlob } from "../utils/WebBlob";
21-
import { createBlob } from "../utils/createBlob";
2221
import { eventToGenerator } from "../utils/eventToGenerator";
2322
import { base64FromBytes } from "../utils/base64FromBytes";
2423
import { isFrontend } from "../utils/isFrontend";
24+
import { createBlobs } from "../utils/createBlobs";
2525

2626
const CONCURRENT_SHAS = 5;
2727
const CONCURRENT_LFS_UPLOADS = 5;
@@ -73,9 +73,15 @@ export type CommitParams = {
7373
/**
7474
* Whether to use web workers to compute SHA256 hashes.
7575
*
76-
* We load hash-wasm from a CDN inside the web worker. Not sure how to do otherwise and still have a "clean" bundle.
76+
* @default false
7777
*/
7878
useWebWorkers?: boolean | { minSize?: number; poolSize?: number };
79+
/**
80+
* Maximum depth of folders to upload. Files deeper than this will be ignored
81+
*
82+
* @default 5
83+
*/
84+
maxFolderDepth?: number;
7985
/**
8086
* Custom fetch function to use instead of the default one, for example to use a proxy or edit headers.
8187
*/
@@ -144,27 +150,32 @@ export async function* commitIter(params: CommitParams): AsyncGenerator<CommitPr
144150
}
145151

146152
try {
147-
const allOperations = await Promise.all(
148-
params.operations.map(async (operation) => {
149-
if (operation.operation !== "addOrUpdate") {
150-
return operation;
151-
}
153+
const allOperations = (
154+
await Promise.all(
155+
params.operations.map(async (operation) => {
156+
if (operation.operation !== "addOrUpdate") {
157+
return operation;
158+
}
152159

153-
if (!(operation.content instanceof URL)) {
154-
/** TS trick to enforce `content` to be a `Blob` */
155-
return { ...operation, content: operation.content };
156-
}
160+
if (!(operation.content instanceof URL)) {
161+
/** TS trick to enforce `content` to be a `Blob` */
162+
return { ...operation, content: operation.content };
163+
}
157164

158-
const lazyBlob = await createBlob(operation.content, { fetch: params.fetch });
165+
const lazyBlobs = await createBlobs(operation.content, {
166+
fetch: params.fetch,
167+
maxFolderDepth: params.maxFolderDepth,
168+
});
159169

160-
abortSignal?.throwIfAborted();
170+
abortSignal?.throwIfAborted();
161171

162-
return {
163-
...operation,
164-
content: lazyBlob,
165-
};
166-
})
167-
);
172+
return lazyBlobs.map((blob) => ({
173+
...operation,
174+
content: blob,
175+
}));
176+
})
177+
)
178+
).flat(1);
168179

169180
const gitAttributes = allOperations.filter(isFileOperation).find((op) => op.path === ".gitattributes")?.content;
170181

packages/hub/src/lib/upload-files-with-progress.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ export async function* uploadFilesWithProgress(
2828
isPullRequest?: CommitParams["isPullRequest"];
2929
parentCommit?: CommitParams["parentCommit"];
3030
abortSignal?: CommitParams["abortSignal"];
31+
maxFolderDepth?: CommitParams["maxFolderDepth"];
3132
/**
3233
* Set this to true in order to have progress events for hashing
3334
*/

packages/hub/src/lib/upload-files.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ export function uploadFiles(
1414
parentCommit?: CommitParams["parentCommit"];
1515
fetch?: CommitParams["fetch"];
1616
useWebWorkers?: CommitParams["useWebWorkers"];
17+
maxFolderDepth?: CommitParams["maxFolderDepth"];
1718
abortSignal?: CommitParams["abortSignal"];
1819
} & Partial<CredentialsParams>
1920
): Promise<CommitOutput> {
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
import { WebBlob } from "./WebBlob";
2+
import { isFrontend } from "./isFrontend";
3+
4+
/**
5+
* This function allow to retrieve either a FileBlob or a WebBlob from a URL.
6+
*
7+
* From the backend:
8+
* - support local files
9+
* - support local folders
10+
* - support http resources with absolute URLs
11+
*
12+
* From the frontend:
13+
* - support http resources with absolute or relative URLs
14+
*/
15+
export async function createBlobs(url: URL, opts?: { fetch?: typeof fetch; maxFolderDepth?: number }): Promise<Blob[]> {
16+
if (url.protocol === "http:" || url.protocol === "https:") {
17+
return [await WebBlob.create(url, { fetch: opts?.fetch })];
18+
}
19+
20+
if (isFrontend) {
21+
throw new TypeError(`Unsupported URL protocol "${url.protocol}"`);
22+
}
23+
24+
if (url.protocol === "file:") {
25+
const { FileBlob } = await import("./FileBlob");
26+
const { subPaths } = await import("./sub-paths");
27+
const paths = await subPaths(url, opts?.maxFolderDepth);
28+
29+
return Promise.all(paths.map((path) => FileBlob.create(path)));
30+
}
31+
32+
throw new TypeError(`Unsupported URL protocol "${url.protocol}"`);
33+
}
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
import { readdir, stat } from "node:fs/promises";
2+
3+
/**
4+
* Recursively retrieves all sub-paths of a given directory up to a specified depth.
5+
*/
6+
export async function subPaths(path: URL, maxDepth = 10): Promise<URL[]> {
7+
const state = await stat(path);
8+
if (!state.isDirectory()) {
9+
return [path];
10+
}
11+
12+
const files = await readdir(path, { withFileTypes: true });
13+
const ret: URL[] = [];
14+
for (const file of files) {
15+
const filePath = new URL(file.name, path);
16+
if (file.isDirectory()) {
17+
ret.push(...(await subPaths(filePath, maxDepth - 1)));
18+
} else {
19+
ret.push(filePath);
20+
}
21+
}
22+
23+
return ret;
24+
}

0 commit comments

Comments
 (0)