Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions packages/hub/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,8 @@ await hub.uploadFiles({
},
// Local file URL
pathToFileURL("./pytorch-model.bin"),
// Local folder URL
pathToFileURL("./models"),
// Web URL
new URL("https://huggingface.co/xlm-roberta-base/resolve/main/tokenizer.json"),
// Path + Web URL
Expand Down
1 change: 1 addition & 0 deletions packages/hub/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
},
"browser": {
"./src/utils/sha256-node.ts": false,
"./src/utils/sub-paths.ts": false,
"./src/utils/FileBlob.ts": false,
"./src/lib/cache-management.ts": false,
"./src/lib/download-file-to-cache-dir.ts": false,
Expand Down
54 changes: 33 additions & 21 deletions packages/hub/src/lib/commit.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,10 @@ import { promisesQueueStreaming } from "../utils/promisesQueueStreaming";
import { sha256 } from "../utils/sha256";
import { toRepoId } from "../utils/toRepoId";
import { WebBlob } from "../utils/WebBlob";
import { createBlob } from "../utils/createBlob";
import { eventToGenerator } from "../utils/eventToGenerator";
import { base64FromBytes } from "../utils/base64FromBytes";
import { isFrontend } from "../utils/isFrontend";
import { createBlobs } from "../utils/createBlobs";

const CONCURRENT_SHAS = 5;
const CONCURRENT_LFS_UPLOADS = 5;
Expand Down Expand Up @@ -73,9 +73,15 @@ export type CommitParams = {
/**
* Whether to use web workers to compute SHA256 hashes.
*
* We load hash-wasm from a CDN inside the web worker. Not sure how to do otherwise and still have a "clean" bundle.
* @default false
*/
useWebWorkers?: boolean | { minSize?: number; poolSize?: number };
/**
* Maximum depth of folders to upload. Files deeper than this will be ignored
*
* @default 5
*/
maxFolderDepth?: number;
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

huggingface_hub uses glob cc @Wauplin @hanouticelina

Unfortunately support is only experimental for nodejs: https://nodejs.org/api/fs.html#fspromisesglobpattern-options

So using a maxFolderDepth (to avoid potential symlink recursions)

/**
* Custom fetch function to use instead of the default one, for example to use a proxy or edit headers.
*/
Expand Down Expand Up @@ -144,27 +150,33 @@ export async function* commitIter(params: CommitParams): AsyncGenerator<CommitPr
}

try {
const allOperations = await Promise.all(
params.operations.map(async (operation) => {
if (operation.operation !== "addOrUpdate") {
return operation;
}

if (!(operation.content instanceof URL)) {
/** TS trick to enforce `content` to be a `Blob` */
return { ...operation, content: operation.content };
}

const lazyBlob = await createBlob(operation.content, { fetch: params.fetch });
const allOperations = (
await Promise.all(
params.operations.map(async (operation) => {
if (operation.operation !== "addOrUpdate") {
return operation;
}

abortSignal?.throwIfAborted();
if (!(operation.content instanceof URL)) {
/** TS trick to enforce `content` to be a `Blob` */
return { ...operation, content: operation.content };
}

return {
...operation,
content: lazyBlob,
};
})
);
const lazyBlobs = await createBlobs(operation.content, operation.path, {
fetch: params.fetch,
maxFolderDepth: params.maxFolderDepth,
});

abortSignal?.throwIfAborted();

return lazyBlobs.map((blob) => ({
...operation,
content: blob.blob,
path: blob.path,
}));
})
)
).flat(1);

const gitAttributes = allOperations.filter(isFileOperation).find((op) => op.path === ".gitattributes")?.content;

Expand Down
2 changes: 1 addition & 1 deletion packages/hub/src/lib/create-repo.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -100,4 +100,4 @@ describe("createRepo", () => {
credentials: { accessToken: TEST_ACCESS_TOKEN },
});
});
}, 10_000);
});
2 changes: 1 addition & 1 deletion packages/hub/src/lib/delete-files.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -78,4 +78,4 @@ describe("deleteFiles", () => {
});
}
});
}, 10_000);
});
1 change: 1 addition & 0 deletions packages/hub/src/lib/upload-files-with-progress.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ export async function* uploadFilesWithProgress(
isPullRequest?: CommitParams["isPullRequest"];
parentCommit?: CommitParams["parentCommit"];
abortSignal?: CommitParams["abortSignal"];
maxFolderDepth?: CommitParams["maxFolderDepth"];
/**
* Set this to true in order to have progress events for hashing
*/
Expand Down
71 changes: 71 additions & 0 deletions packages/hub/src/lib/upload-files.fs.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
import { assert, it, describe } from "vitest";

import { TEST_ACCESS_TOKEN, TEST_HUB_URL, TEST_USER } from "../test/consts";
import type { RepoId } from "../types/public";
import { insecureRandomString } from "../utils/insecureRandomString";
import { createRepo } from "./create-repo";
import { deleteRepo } from "./delete-repo";
import { downloadFile } from "./download-file";
import { uploadFiles } from "./upload-files";
import { mkdir } from "fs/promises";
import { writeFile } from "fs/promises";
import { pathToFileURL } from "url";
import { tmpdir } from "os";

describe("uploadFiles", () => {
it("should upload local folder", async () => {
const tmpDir = tmpdir();

await mkdir(`${tmpDir}/test-folder/sub`, { recursive: true });

await writeFile(`${tmpDir}/test-folder/sub/file1.txt`, "file1");
await writeFile(`${tmpDir}/test-folder/sub/file2.txt`, "file2");

await writeFile(`${tmpDir}/test-folder/file3.txt`, "file3");
await writeFile(`${tmpDir}/test-folder/file4.txt`, "file4");

const repoName = `${TEST_USER}/TEST-${insecureRandomString()}`;
const repo = { type: "model", name: repoName } satisfies RepoId;

try {
const result = await createRepo({
accessToken: TEST_ACCESS_TOKEN,
repo,
hubUrl: TEST_HUB_URL,
});

assert.deepStrictEqual(result, {
repoUrl: `${TEST_HUB_URL}/${repoName}`,
});

await uploadFiles({
accessToken: TEST_ACCESS_TOKEN,
repo,
files: [pathToFileURL(`${tmpDir}/test-folder`)],
hubUrl: TEST_HUB_URL,
});

let content = await downloadFile({
repo,
path: "test-folder/sub/file1.txt",
hubUrl: TEST_HUB_URL,
});

assert.strictEqual(await content?.text(), "file1");

content = await downloadFile({
repo,
path: "test-folder/file3.txt",
hubUrl: TEST_HUB_URL,
});

assert.strictEqual(await content?.text(), `file3`);
} finally {
await deleteRepo({
repo,
accessToken: TEST_ACCESS_TOKEN,
hubUrl: TEST_HUB_URL,
});
}
});
});
2 changes: 1 addition & 1 deletion packages/hub/src/lib/upload-files.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -92,4 +92,4 @@ describe("uploadFiles", () => {
});
}
});
}, 10_000);
});
1 change: 1 addition & 0 deletions packages/hub/src/lib/upload-files.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ export function uploadFiles(
parentCommit?: CommitParams["parentCommit"];
fetch?: CommitParams["fetch"];
useWebWorkers?: CommitParams["useWebWorkers"];
maxFolderDepth?: CommitParams["maxFolderDepth"];
abortSignal?: CommitParams["abortSignal"];
} & Partial<CredentialsParams>
): Promise<CommitOutput> {
Expand Down
48 changes: 48 additions & 0 deletions packages/hub/src/utils/createBlobs.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
import { WebBlob } from "./WebBlob";
import { isFrontend } from "./isFrontend";

/**
* This function allow to retrieve either a FileBlob or a WebBlob from a URL.
*
* From the backend:
* - support local files
* - support local folders
* - support http resources with absolute URLs
*
* From the frontend:
* - support http resources with absolute or relative URLs
*/
export async function createBlobs(
url: URL,
destPath: string,
opts?: { fetch?: typeof fetch; maxFolderDepth?: number }
): Promise<Array<{ path: string; blob: Blob }>> {
if (url.protocol === "http:" || url.protocol === "https:") {
const blob = await WebBlob.create(url, { fetch: opts?.fetch });
return [{ path: destPath, blob }];
}

if (isFrontend) {
throw new TypeError(`Unsupported URL protocol "${url.protocol}"`);
}

if (url.protocol === "file:") {
const { FileBlob } = await import("./FileBlob");
const { subPaths } = await import("./sub-paths");
const paths = await subPaths(url, opts?.maxFolderDepth);

if (paths.length === 1 && paths[0].relativePath === ".") {
const blob = await FileBlob.create(url);
return [{ path: destPath, blob }];
}

return Promise.all(
paths.map(async (path) => ({
path: `${destPath}/${path.relativePath}`.replace(/\/[.]$/, "").replaceAll("//", "/"),
blob: await FileBlob.create(new URL(path.path)),
}))
);
}

throw new TypeError(`Unsupported URL protocol "${url.protocol}"`);
}
39 changes: 39 additions & 0 deletions packages/hub/src/utils/sub-paths.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
import { mkdir, writeFile } from "fs/promises";
import { tmpdir } from "os";
import { describe, expect, it } from "vitest";
import { subPaths } from "./sub-paths";
import { pathToFileURL } from "url";

describe("sub-paths", () => {
it("should retrieve all sub-paths of a directory", async () => {
const tmpDir = tmpdir();

await mkdir(`${tmpDir}/test-dir/sub`, { recursive: true });

await writeFile(`${tmpDir}/test-dir/sub/file1.txt`, "file1");
await writeFile(`${tmpDir}/test-dir/sub/file2.txt`, "file2");
await writeFile(`${tmpDir}/test-dir/file3.txt`, "file3");
await writeFile(`${tmpDir}/test-dir/file4.txt`, "file4");
const result = await subPaths(pathToFileURL(`${tmpDir}/test-dir`));

expect(result).toEqual([
{
path: pathToFileURL(`${tmpDir}/test-dir/file3.txt`),
relativePath: "file3.txt",
},
{
path: pathToFileURL(`${tmpDir}/test-dir/file4.txt`),
relativePath: "file4.txt",
},

{
path: pathToFileURL(`${tmpDir}/test-dir/sub/file1.txt`),
relativePath: "sub/file1.txt",
},
{
path: pathToFileURL(`${tmpDir}/test-dir/sub/file2.txt`),
relativePath: "sub/file2.txt",
},
]);
});
});
38 changes: 38 additions & 0 deletions packages/hub/src/utils/sub-paths.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import { readdir, stat } from "node:fs/promises";
import { fileURLToPath, pathToFileURL } from "node:url";

/**
* Recursively retrieves all sub-paths of a given directory up to a specified depth.
*/
export async function subPaths(
path: URL,
maxDepth = 10
): Promise<
Array<{
path: URL;
relativePath: string;
}>
> {
const state = await stat(path);
if (!state.isDirectory()) {
return [{ path, relativePath: "." }];
}

const files = await readdir(path, { withFileTypes: true });
const ret: Array<{ path: URL; relativePath: string }> = [];
for (const file of files) {
const filePath = pathToFileURL(fileURLToPath(path) + "/" + file.name);
if (file.isDirectory()) {
ret.push(
...(await subPaths(filePath, maxDepth - 1)).map((subPath) => ({
...subPath,
relativePath: `${file.name}/${subPath.relativePath}`,
}))
);
} else {
ret.push({ path: filePath, relativePath: file.name });
}
}

return ret;
}
2 changes: 2 additions & 0 deletions packages/hub/vitest-browser.config.mts
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,11 @@ export default defineConfig({
...configDefaults.exclude,
"src/utils/FileBlob.spec.ts",
"src/utils/symlink.spec.ts",
"src/utils/sub-paths.spec.ts",
"src/lib/cache-management.spec.ts",
"src/lib/download-file-to-cache-dir.spec.ts",
"src/lib/snapshot-download.spec.ts",
"src/lib/upload-files.fs.spec.ts",
// Because we use redirect: "manual" in the test
"src/lib/oauth-handle-redirect.spec.ts",
],
Expand Down
Loading