Skip to content
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions packages/hub/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,18 @@ console.log(oauthResult);

Checkout the demo: https://huggingface.co/spaces/huggingfacejs/client-side-oauth

## Hugging face cache

The `@huggingface/hub` package provide basic capabilities to scan the cache directory. Learn more about [Manage huggingface_hub cache-system](https://huggingface.co/docs/huggingface_hub/en/guides/manage-cache).

```ts
import { scanCacheDir } from "@huggingface/hub";

const result = await scanCacheDir();

console.log(result);
```

## Performance considerations

When uploading large files, you may want to run the `commit` calls inside a worker, to offload the sha256 computations.
Expand Down
1 change: 1 addition & 0 deletions packages/hub/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
"browser": {
"./src/utils/sha256-node.ts": false,
"./src/utils/FileBlob.ts": false,
"./src/lib/cache-management.ts": false,
"./dist/index.js": "./dist/browser/index.js",
"./dist/index.mjs": "./dist/browser/index.mjs"
},
Expand Down
140 changes: 140 additions & 0 deletions packages/hub/src/lib/cache-management.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
import { describe, test, expect, vi, beforeEach } from "vitest";
import {
scanCacheDir,
scanCachedRepo,
scanSnapshotDir,
parseRepoType,
getBlobStat,
type CachedFileInfo,
} from "./cache-management";
import { stat, readdir, realpath, lstat } from "node:fs/promises";
import type { Dirent, Stats } from "node:fs";
import { join } from "node:path";

// Mocks
vi.mock("node:fs/promises");

beforeEach(() => {
vi.resetAllMocks();
vi.restoreAllMocks();
});

describe("scanCacheDir", () => {
test("should throw an error if cacheDir is not a directory", async () => {
vi.mocked(stat).mockResolvedValueOnce({
isDirectory: () => false,
} as Stats);

await expect(scanCacheDir("/fake/dir")).rejects.toThrow("Scan cache expects a directory");
});

test("empty directory should return an empty set of repository and no warnings", async () => {
vi.mocked(stat).mockResolvedValueOnce({
isDirectory: () => true,
} as Stats);

// mock empty cache folder
vi.mocked(readdir).mockResolvedValue([]);

const result = await scanCacheDir("/fake/dir");

// cacheDir must have been read
expect(readdir).toHaveBeenCalledWith("/fake/dir");

expect(result.warnings.length).toBe(0);
expect(result.repos).toHaveLength(0);
expect(result.size).toBe(0);
});
});

describe("scanCachedRepo", () => {
test("should throw an error for invalid repo path", async () => {
await expect(() => {
return scanCachedRepo("/fake/repo_path");
}).rejects.toThrow("Repo path is not a valid HuggingFace cache directory");
});

test("should throw an error if the snapshot folder does not exist", async () => {
vi.mocked(readdir).mockResolvedValue([]);
vi.mocked(stat).mockResolvedValue({
isDirectory: () => false,
} as Stats);

await expect(() => {
return scanCachedRepo("/fake/cacheDir/models--hello-world--name");
}).rejects.toThrow("Snapshots dir doesn't exist in cached repo");
});

test("should properly parse the repository name", async () => {
const repoPath = "/fake/cacheDir/models--hello-world--name";
vi.mocked(readdir).mockResolvedValue([]);
vi.mocked(stat).mockResolvedValue({
isDirectory: () => true,
} as Stats);

const result = await scanCachedRepo(repoPath);
expect(readdir).toHaveBeenCalledWith(join(repoPath, "refs"), {
withFileTypes: true,
});

expect(result.repoId).toBe("hello-world/name");
});
});

describe("scanSnapshotDir", () => {
test("should scan a valid snapshot directory", async () => {
const cachedFiles: CachedFileInfo[] = [];
const blobStats = new Map<string, Stats>();
vi.mocked(readdir).mockResolvedValueOnce([{ name: "file1", isDirectory: () => false } as Dirent]);

vi.mocked(realpath).mockResolvedValueOnce("/fake/realpath");
vi.mocked(lstat).mockResolvedValueOnce({ size: 1024, atimeMs: Date.now(), mtimeMs: Date.now() } as Stats);

await scanSnapshotDir("/fake/revision", cachedFiles, blobStats);

expect(cachedFiles).toHaveLength(1);
expect(blobStats.size).toBe(1);
});
});

describe("getBlobStat", () => {
test("should retrieve blob stat if already cached", async () => {
const blobStats = new Map<string, Stats>([["/fake/blob", { size: 1024 } as Stats]]);
const result = await getBlobStat("/fake/blob", blobStats);

expect(lstat).not.toHaveBeenCalled();
expect(result.size).toBe(1024);
});

test("should fetch and cache blob stat if not cached", async () => {
const blobStats = new Map();
vi.mocked(lstat).mockResolvedValueOnce({ size: 2048 } as Stats);

const result = await getBlobStat("/fake/blob", blobStats);

expect(result.size).toBe(2048);
expect(blobStats.size).toBe(1);
});
});

describe("parseRepoType", () => {
test("should parse models repo type", () => {
expect(parseRepoType("models")).toBe("model");
});

test("should parse model repo type", () => {
expect(parseRepoType("model")).toBe("model");
});

test("should parse dataset repo type", () => {
expect(parseRepoType("dataset")).toBe("dataset");
});

test("should parse space repo type", () => {
expect(parseRepoType("space")).toBe("space");
});

test("should throw an error for invalid repo type", () => {
expect(() => parseRepoType("invalid")).toThrowError("Invalid repo type: invalid");
});
});
Loading
Loading