Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions packages/hub/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,8 @@ await hub.uploadFiles({
},
// Local file URL
pathToFileURL("./pytorch-model.bin"),
// Local folder URL
pathToFileURL("./models"),
// Web URL
new URL("https://huggingface.co/xlm-roberta-base/resolve/main/tokenizer.json"),
// Path + Web URL
Expand Down
1 change: 1 addition & 0 deletions packages/hub/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
},
"browser": {
"./src/utils/sha256-node.ts": false,
"./src/utils/sub-paths.ts": false,
"./src/utils/FileBlob.ts": false,
"./src/lib/cache-management.ts": false,
"./src/lib/download-file-to-cache-dir.ts": false,
Expand Down
49 changes: 30 additions & 19 deletions packages/hub/src/lib/commit.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,10 @@ import { promisesQueueStreaming } from "../utils/promisesQueueStreaming";
import { sha256 } from "../utils/sha256";
import { toRepoId } from "../utils/toRepoId";
import { WebBlob } from "../utils/WebBlob";
import { createBlob } from "../utils/createBlob";
import { eventToGenerator } from "../utils/eventToGenerator";
import { base64FromBytes } from "../utils/base64FromBytes";
import { isFrontend } from "../utils/isFrontend";
import { createBlobs } from "../utils/createBlobs";

const CONCURRENT_SHAS = 5;
const CONCURRENT_LFS_UPLOADS = 5;
Expand Down Expand Up @@ -73,9 +73,15 @@ export type CommitParams = {
/**
* Whether to use web workers to compute SHA256 hashes.
*
* We load hash-wasm from a CDN inside the web worker. Not sure how to do otherwise and still have a "clean" bundle.
* @default false
*/
useWebWorkers?: boolean | { minSize?: number; poolSize?: number };
/**
* Maximum depth of folders to upload. Files deeper than this will be ignored
*
* @default 5
*/
maxFolderDepth?: number;
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

huggingface_hub uses glob cc @Wauplin @hanouticelina

Unfortunately support is only experimental for nodejs: https://nodejs.org/api/fs.html#fspromisesglobpattern-options

So using a maxFolderDepth (to avoid potential symlink recursions)

/**
* Custom fetch function to use instead of the default one, for example to use a proxy or edit headers.
*/
Expand Down Expand Up @@ -144,27 +150,32 @@ export async function* commitIter(params: CommitParams): AsyncGenerator<CommitPr
}

try {
const allOperations = await Promise.all(
params.operations.map(async (operation) => {
if (operation.operation !== "addOrUpdate") {
return operation;
}
const allOperations = (
await Promise.all(
params.operations.map(async (operation) => {
if (operation.operation !== "addOrUpdate") {
return operation;
}

if (!(operation.content instanceof URL)) {
/** TS trick to enforce `content` to be a `Blob` */
return { ...operation, content: operation.content };
}
if (!(operation.content instanceof URL)) {
/** TS trick to enforce `content` to be a `Blob` */
return { ...operation, content: operation.content };
}

const lazyBlob = await createBlob(operation.content, { fetch: params.fetch });
const lazyBlobs = await createBlobs(operation.content, {
fetch: params.fetch,
maxFolderDepth: params.maxFolderDepth,
});

abortSignal?.throwIfAborted();
abortSignal?.throwIfAborted();

return {
...operation,
content: lazyBlob,
};
})
);
return lazyBlobs.map((blob) => ({
...operation,
content: blob,
}));
})
)
).flat(1);

const gitAttributes = allOperations.filter(isFileOperation).find((op) => op.path === ".gitattributes")?.content;

Expand Down
1 change: 1 addition & 0 deletions packages/hub/src/lib/upload-files-with-progress.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ export async function* uploadFilesWithProgress(
isPullRequest?: CommitParams["isPullRequest"];
parentCommit?: CommitParams["parentCommit"];
abortSignal?: CommitParams["abortSignal"];
maxFolderDepth?: CommitParams["maxFolderDepth"];
/**
* Set this to true in order to have progress events for hashing
*/
Expand Down
68 changes: 68 additions & 0 deletions packages/hub/src/lib/upload-files.fs.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
import { assert, it, describe } from "vitest";

import { TEST_ACCESS_TOKEN, TEST_HUB_URL, TEST_USER } from "../test/consts";
import type { RepoId } from "../types/public";
import { insecureRandomString } from "../utils/insecureRandomString";
import { createRepo } from "./create-repo";
import { deleteRepo } from "./delete-repo";
import { downloadFile } from "./download-file";
import { uploadFiles } from "./upload-files";
import { mkdir } from "fs/promises";
import { writeFile } from "fs/promises";
import { pathToFileURL } from "url";

describe("uploadFiles", () => {
it("should upload local folder", async () => {
await mkdir("test-folder/sub", { recursive: true });

await writeFile("test-folder/sub/file1.txt", "file1");
await writeFile("test-folder/sub/file2.txt", "file2");

await writeFile("test-folder/file3.txt", "file3");
await writeFile("test-folder/file4.txt", "file4");

const repoName = `${TEST_USER}/TEST-${insecureRandomString()}`;
const repo = { type: "model", name: repoName } satisfies RepoId;

try {
const result = await createRepo({
accessToken: TEST_ACCESS_TOKEN,
repo,
hubUrl: TEST_HUB_URL,
});

assert.deepStrictEqual(result, {
repoUrl: `${TEST_HUB_URL}/${repoName}`,
});

await uploadFiles({
accessToken: TEST_ACCESS_TOKEN,
repo,
files: [pathToFileURL("test-folder")],
hubUrl: TEST_HUB_URL,
});

let content = await downloadFile({
repo,
path: "test-folder/sub/file1.txt",
hubUrl: TEST_HUB_URL,
});

assert.strictEqual(await content?.text(), "file1");

content = await downloadFile({
repo,
path: "test-folder/file3.txt",
hubUrl: TEST_HUB_URL,
});

assert.strictEqual(await content?.text(), `file3`);
} finally {
await deleteRepo({
repo,
accessToken: TEST_ACCESS_TOKEN,
hubUrl: TEST_HUB_URL,
});
}
});
});
2 changes: 1 addition & 1 deletion packages/hub/src/lib/upload-files.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -92,4 +92,4 @@ describe("uploadFiles", () => {
});
}
});
}, 10_000);
});
1 change: 1 addition & 0 deletions packages/hub/src/lib/upload-files.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ export function uploadFiles(
parentCommit?: CommitParams["parentCommit"];
fetch?: CommitParams["fetch"];
useWebWorkers?: CommitParams["useWebWorkers"];
maxFolderDepth?: CommitParams["maxFolderDepth"];
abortSignal?: CommitParams["abortSignal"];
} & Partial<CredentialsParams>
): Promise<CommitOutput> {
Expand Down
33 changes: 33 additions & 0 deletions packages/hub/src/utils/createBlobs.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
import { WebBlob } from "./WebBlob";
import { isFrontend } from "./isFrontend";

/**
* This function allow to retrieve either a FileBlob or a WebBlob from a URL.
*
* From the backend:
* - support local files
* - support local folders
* - support http resources with absolute URLs
*
* From the frontend:
* - support http resources with absolute or relative URLs
*/
export async function createBlobs(url: URL, opts?: { fetch?: typeof fetch; maxFolderDepth?: number }): Promise<Blob[]> {
if (url.protocol === "http:" || url.protocol === "https:") {
return [await WebBlob.create(url, { fetch: opts?.fetch })];
}

if (isFrontend) {
throw new TypeError(`Unsupported URL protocol "${url.protocol}"`);
}

if (url.protocol === "file:") {
const { FileBlob } = await import("./FileBlob");
const { subPaths } = await import("./sub-paths");
const paths = await subPaths(url, opts?.maxFolderDepth);

return Promise.all(paths.map((path) => FileBlob.create(path)));
}

throw new TypeError(`Unsupported URL protocol "${url.protocol}"`);
}
24 changes: 24 additions & 0 deletions packages/hub/src/utils/sub-paths.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
import { readdir, stat } from "node:fs/promises";

/**
* Recursively retrieves all sub-paths of a given directory up to a specified depth.
*/
export async function subPaths(path: URL, maxDepth = 10): Promise<URL[]> {
const state = await stat(path);
if (!state.isDirectory()) {
return [path];
}

const files = await readdir(path, { withFileTypes: true });
const ret: URL[] = [];
for (const file of files) {
const filePath = new URL(file.name, path);
if (file.isDirectory()) {
ret.push(...(await subPaths(filePath, maxDepth - 1)));
} else {
ret.push(filePath);
}
}

return ret;
}
1 change: 1 addition & 0 deletions packages/hub/vitest-browser.config.mts
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ export default defineConfig({
"src/lib/cache-management.spec.ts",
"src/lib/download-file-to-cache-dir.spec.ts",
"src/lib/snapshot-download.spec.ts",
"src/lib/upload-files.fs.spec.ts",
// Because we use redirect: "manual" in the test
"src/lib/oauth-handle-redirect.spec.ts",
],
Expand Down
Loading