Skip to content

Commit 21b48fa

Browse files
authored
handle folders in uploadFiles (#1398)
If we want to expose a CLI for the JS client... cc @Pierrci for viz
1 parent eaa1b9c commit 21b48fa

13 files changed

+239
-24
lines changed

packages/hub/README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,8 @@ await hub.uploadFiles({
5757
},
5858
// Local file URL
5959
pathToFileURL("./pytorch-model.bin"),
60+
// Local folder URL
61+
pathToFileURL("./models"),
6062
// Web URL
6163
new URL("https://huggingface.co/xlm-roberta-base/resolve/main/tokenizer.json"),
6264
// Path + Web URL

packages/hub/package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
},
2020
"browser": {
2121
"./src/utils/sha256-node.ts": false,
22+
"./src/utils/sub-paths.ts": false,
2223
"./src/utils/FileBlob.ts": false,
2324
"./src/lib/cache-management.ts": false,
2425
"./src/lib/download-file-to-cache-dir.ts": false,

packages/hub/src/lib/commit.ts

Lines changed: 33 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,10 @@ import { promisesQueueStreaming } from "../utils/promisesQueueStreaming";
1818
import { sha256 } from "../utils/sha256";
1919
import { toRepoId } from "../utils/toRepoId";
2020
import { WebBlob } from "../utils/WebBlob";
21-
import { createBlob } from "../utils/createBlob";
2221
import { eventToGenerator } from "../utils/eventToGenerator";
2322
import { base64FromBytes } from "../utils/base64FromBytes";
2423
import { isFrontend } from "../utils/isFrontend";
24+
import { createBlobs } from "../utils/createBlobs";
2525

2626
const CONCURRENT_SHAS = 5;
2727
const CONCURRENT_LFS_UPLOADS = 5;
@@ -73,9 +73,15 @@ export type CommitParams = {
7373
/**
7474
* Whether to use web workers to compute SHA256 hashes.
7575
*
76-
* We load hash-wasm from a CDN inside the web worker. Not sure how to do otherwise and still have a "clean" bundle.
76+
* @default false
7777
*/
7878
useWebWorkers?: boolean | { minSize?: number; poolSize?: number };
79+
/**
80+
* Maximum depth of folders to upload. Files deeper than this will be ignored
81+
*
82+
* @default 5
83+
*/
84+
maxFolderDepth?: number;
7985
/**
8086
* Custom fetch function to use instead of the default one, for example to use a proxy or edit headers.
8187
*/
@@ -144,27 +150,33 @@ export async function* commitIter(params: CommitParams): AsyncGenerator<CommitPr
144150
}
145151

146152
try {
147-
const allOperations = await Promise.all(
148-
params.operations.map(async (operation) => {
149-
if (operation.operation !== "addOrUpdate") {
150-
return operation;
151-
}
152-
153-
if (!(operation.content instanceof URL)) {
154-
/** TS trick to enforce `content` to be a `Blob` */
155-
return { ...operation, content: operation.content };
156-
}
157-
158-
const lazyBlob = await createBlob(operation.content, { fetch: params.fetch });
153+
const allOperations = (
154+
await Promise.all(
155+
params.operations.map(async (operation) => {
156+
if (operation.operation !== "addOrUpdate") {
157+
return operation;
158+
}
159159

160-
abortSignal?.throwIfAborted();
160+
if (!(operation.content instanceof URL)) {
161+
/** TS trick to enforce `content` to be a `Blob` */
162+
return { ...operation, content: operation.content };
163+
}
161164

162-
return {
163-
...operation,
164-
content: lazyBlob,
165-
};
166-
})
167-
);
165+
const lazyBlobs = await createBlobs(operation.content, operation.path, {
166+
fetch: params.fetch,
167+
maxFolderDepth: params.maxFolderDepth,
168+
});
169+
170+
abortSignal?.throwIfAborted();
171+
172+
return lazyBlobs.map((blob) => ({
173+
...operation,
174+
content: blob.blob,
175+
path: blob.path,
176+
}));
177+
})
178+
)
179+
).flat(1);
168180

169181
const gitAttributes = allOperations.filter(isFileOperation).find((op) => op.path === ".gitattributes")?.content;
170182

packages/hub/src/lib/create-repo.spec.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,4 +100,4 @@ describe("createRepo", () => {
100100
credentials: { accessToken: TEST_ACCESS_TOKEN },
101101
});
102102
});
103-
}, 10_000);
103+
});

packages/hub/src/lib/delete-files.spec.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,4 +78,4 @@ describe("deleteFiles", () => {
7878
});
7979
}
8080
});
81-
}, 10_000);
81+
});

packages/hub/src/lib/upload-files-with-progress.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ export async function* uploadFilesWithProgress(
2828
isPullRequest?: CommitParams["isPullRequest"];
2929
parentCommit?: CommitParams["parentCommit"];
3030
abortSignal?: CommitParams["abortSignal"];
31+
maxFolderDepth?: CommitParams["maxFolderDepth"];
3132
/**
3233
* Set this to true in order to have progress events for hashing
3334
*/
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
import { assert, it, describe } from "vitest";
2+
3+
import { TEST_ACCESS_TOKEN, TEST_HUB_URL, TEST_USER } from "../test/consts";
4+
import type { RepoId } from "../types/public";
5+
import { insecureRandomString } from "../utils/insecureRandomString";
6+
import { createRepo } from "./create-repo";
7+
import { deleteRepo } from "./delete-repo";
8+
import { downloadFile } from "./download-file";
9+
import { uploadFiles } from "./upload-files";
10+
import { mkdir } from "fs/promises";
11+
import { writeFile } from "fs/promises";
12+
import { pathToFileURL } from "url";
13+
import { tmpdir } from "os";
14+
15+
describe("uploadFiles", () => {
16+
it("should upload local folder", async () => {
17+
const tmpDir = tmpdir();
18+
19+
await mkdir(`${tmpDir}/test-folder/sub`, { recursive: true });
20+
21+
await writeFile(`${tmpDir}/test-folder/sub/file1.txt`, "file1");
22+
await writeFile(`${tmpDir}/test-folder/sub/file2.txt`, "file2");
23+
24+
await writeFile(`${tmpDir}/test-folder/file3.txt`, "file3");
25+
await writeFile(`${tmpDir}/test-folder/file4.txt`, "file4");
26+
27+
const repoName = `${TEST_USER}/TEST-${insecureRandomString()}`;
28+
const repo = { type: "model", name: repoName } satisfies RepoId;
29+
30+
try {
31+
const result = await createRepo({
32+
accessToken: TEST_ACCESS_TOKEN,
33+
repo,
34+
hubUrl: TEST_HUB_URL,
35+
});
36+
37+
assert.deepStrictEqual(result, {
38+
repoUrl: `${TEST_HUB_URL}/${repoName}`,
39+
});
40+
41+
await uploadFiles({
42+
accessToken: TEST_ACCESS_TOKEN,
43+
repo,
44+
files: [pathToFileURL(`${tmpDir}/test-folder`)],
45+
hubUrl: TEST_HUB_URL,
46+
});
47+
48+
let content = await downloadFile({
49+
repo,
50+
path: "test-folder/sub/file1.txt",
51+
hubUrl: TEST_HUB_URL,
52+
});
53+
54+
assert.strictEqual(await content?.text(), "file1");
55+
56+
content = await downloadFile({
57+
repo,
58+
path: "test-folder/file3.txt",
59+
hubUrl: TEST_HUB_URL,
60+
});
61+
62+
assert.strictEqual(await content?.text(), `file3`);
63+
} finally {
64+
await deleteRepo({
65+
repo,
66+
accessToken: TEST_ACCESS_TOKEN,
67+
hubUrl: TEST_HUB_URL,
68+
});
69+
}
70+
});
71+
});

packages/hub/src/lib/upload-files.spec.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,4 +92,4 @@ describe("uploadFiles", () => {
9292
});
9393
}
9494
});
95-
}, 10_000);
95+
});

packages/hub/src/lib/upload-files.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ export function uploadFiles(
1414
parentCommit?: CommitParams["parentCommit"];
1515
fetch?: CommitParams["fetch"];
1616
useWebWorkers?: CommitParams["useWebWorkers"];
17+
maxFolderDepth?: CommitParams["maxFolderDepth"];
1718
abortSignal?: CommitParams["abortSignal"];
1819
} & Partial<CredentialsParams>
1920
): Promise<CommitOutput> {
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
import { WebBlob } from "./WebBlob";
2+
import { isFrontend } from "./isFrontend";
3+
4+
/**
5+
* This function allow to retrieve either a FileBlob or a WebBlob from a URL.
6+
*
7+
* From the backend:
8+
* - support local files
9+
* - support local folders
10+
* - support http resources with absolute URLs
11+
*
12+
* From the frontend:
13+
* - support http resources with absolute or relative URLs
14+
*/
15+
export async function createBlobs(
16+
url: URL,
17+
destPath: string,
18+
opts?: { fetch?: typeof fetch; maxFolderDepth?: number }
19+
): Promise<Array<{ path: string; blob: Blob }>> {
20+
if (url.protocol === "http:" || url.protocol === "https:") {
21+
const blob = await WebBlob.create(url, { fetch: opts?.fetch });
22+
return [{ path: destPath, blob }];
23+
}
24+
25+
if (isFrontend) {
26+
throw new TypeError(`Unsupported URL protocol "${url.protocol}"`);
27+
}
28+
29+
if (url.protocol === "file:") {
30+
const { FileBlob } = await import("./FileBlob");
31+
const { subPaths } = await import("./sub-paths");
32+
const paths = await subPaths(url, opts?.maxFolderDepth);
33+
34+
if (paths.length === 1 && paths[0].relativePath === ".") {
35+
const blob = await FileBlob.create(url);
36+
return [{ path: destPath, blob }];
37+
}
38+
39+
return Promise.all(
40+
paths.map(async (path) => ({
41+
path: `${destPath}/${path.relativePath}`.replace(/\/[.]$/, "").replaceAll("//", "/"),
42+
blob: await FileBlob.create(new URL(path.path)),
43+
}))
44+
);
45+
}
46+
47+
throw new TypeError(`Unsupported URL protocol "${url.protocol}"`);
48+
}

0 commit comments

Comments
 (0)