Skip to content
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/cli/pull.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ If a file already exists and its size matches the expected size, it will not be

The supported URI schemes are:
- **HTTP:** `https://`, `http://`
- **Hugging Face:** `hf:<user>/<model>:<quant>` (`#<quant>` is optional, [but recommended](../guide/downloading-models.md#hf-scheme-specify-quant))
- **Hugging Face:** `hf:<user>/<model>/<file-path>#<branch>` (`#<branch>` is optional)

Learn more about using model URIs in the [Downloading Models guide](../guide/downloading-models.md#model-uris).
Expand Down
22 changes: 17 additions & 5 deletions docs/guide/downloading-models.md
Original file line number Diff line number Diff line change
Expand Up @@ -75,14 +75,19 @@ You can reference models using a URI instead of their full download URL when usi

When downloading a model from a URI, the model files will be prefixed with a corresponding adaptation of the URI.

To reference a model from Hugging Face, you can use the scheme
<br/>
`hf:<user>/<model>/<file-path>#<branch>` (`#<branch>` is optional).
To reference a model from Hugging Face, you can use one of these schemes:
* `hf:<user>/<model>:<quant>` (`#<quant>` is optional, [but recommended](#hf-scheme-specify-quant))
* `hf:<user>/<model>/<file-path>#<branch>` (`#<branch>` is optional)

Here's an example usage of the Hugging Face URI scheme:
Here are example usages of the Hugging Face URI scheme:
::: code-group
```[With quant]
hf:mradermacher/Meta-Llama-3.1-8B-Instruct-GGUF:Q4_K_M
```
```[Specific file]
hf:mradermacher/Meta-Llama-3.1-8B-Instruct-GGUF/Meta-Llama-3.1-8B-Instruct.Q4_K_M.gguf
```
:::

When using a URI to reference a model,
it's recommended [to add it to your `package.json` file](#cli) to ensure it's downloaded when running `npm install`,
Expand All @@ -98,7 +103,7 @@ const __dirname = path.dirname(fileURLToPath(import.meta.url));
const modelsDirectory = path.join(__dirname, "models");

const modelPath = await resolveModelFile(
"hf:user/model/model-file.gguf",
"hf:user/model:quant",
modelsDirectory
);

Expand All @@ -114,6 +119,13 @@ When a file is being downloaded, the download progress is shown in the console b
Set the [`cli`](../api/type-aliases/ResolveModelFileOptions#cli) option to `false` to disable this behavior.
:::

::: tip TIP {#hf-scheme-specify-quant}
When using the `hf:<user>/<model>:<quant>` scheme, always specify the quantization level in the URI (`:<quant>`).
<br/>
Doing this allows the resolver to resolve to a local model file without checking the model metadata on Hugging Face first,
so it will be resolved offline and faster.
:::

## Downloading Gated Models From Hugging Face {#hf-token}
Some models on Hugging Face are "gated", meaning they require a manual consent from you before you can download them.

Expand Down
3 changes: 2 additions & 1 deletion src/cli/commands/PullCommand.ts
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,8 @@ export const PullCommand: CommandModule<object, PullCommand> = {
deleteTempFileOnCancel: noTempFile,
skipExisting: !override,
fileName: filename || undefined,
parallelDownloads: parallel
parallelDownloads: parallel,
_showUriResolvingProgress: !noProgress
});

if (!override && downloader.totalFiles === 1 && await fs.pathExists(downloader.entrypointFilePath)) {
Expand Down
26 changes: 20 additions & 6 deletions src/cli/commands/inspect/commands/InspectEstimateCommand.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import process from "process";
import {CommandModule} from "yargs";
import chalk from "chalk";
import fs from "fs-extra";
import {readGgufFileInfo} from "../../../../gguf/readGgufFileInfo.js";
import {resolveHeaderFlag} from "../../../utils/resolveHeaderFlag.js";
import {withCliCommandDescriptionDocsUrl} from "../../../utils/withCliCommandDescriptionDocsUrl.js";
Expand All @@ -17,9 +19,11 @@ import {Llama} from "../../../../bindings/Llama.js";
import {getGgufFileTypeName} from "../../../../gguf/utils/getGgufFileTypeName.js";
import {getPrettyBuildGpuName} from "../../../../bindings/consts.js";
import withOra from "../../../../utils/withOra.js";
import {resolveModelDestination} from "../../../../utils/resolveModelDestination.js";
import {resolveModelArgToFilePathOrUrl} from "../../../../utils/resolveModelDestination.js";
import {printModelDestination} from "../../../utils/printModelDestination.js";
import {toBytes} from "../../../utils/toBytes.js";
import {printDidYouMeanUri} from "../../../utils/resolveCommandGgufPath.js";
import {isModelUri} from "../../../../utils/parseModelUri.js";

type InspectEstimateCommand = {
modelPath: string,
Expand Down Expand Up @@ -121,13 +125,23 @@ export const InspectEstimateCommand: CommandModule<object, InspectEstimateComman
if (contextSizeArg === -1) contextSizeArg = undefined;
if (contextSizeArg === -2) contextSizeArg = "train";

const resolvedModelDestination = resolveModelDestination(ggufPath);
const resolvedGgufPath = resolvedModelDestination.type == "file"
? resolvedModelDestination.path
: resolvedModelDestination.url;

const headers = resolveHeaderFlag(headerArg);

const [resolvedModelDestination, resolvedGgufPath] = isModelUri(ggufPath)
? await withOra({
loading: chalk.blue("Resolving model URI"),
success: chalk.blue("Resolved model URI"),
fail: chalk.blue("Failed to resolve model URI"),
noSuccessLiveStatus: true
}, () => resolveModelArgToFilePathOrUrl(ggufPath, headers))
: await resolveModelArgToFilePathOrUrl(ggufPath, headers);

if (resolvedModelDestination.type === "file" && !await fs.pathExists(resolvedGgufPath)) {
console.error(`${chalk.red("File does not exist:")} ${resolvedGgufPath}`);
printDidYouMeanUri(ggufPath);
process.exit(1);
}

const llama = gpu == null
? await getLlama("lastBuild", {
logLevel: LlamaLogLevel.error
Expand Down
24 changes: 18 additions & 6 deletions src/cli/commands/inspect/commands/InspectGgufCommand.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,13 @@ import {resolveHeaderFlag} from "../../../utils/resolveHeaderFlag.js";
import {withCliCommandDescriptionDocsUrl} from "../../../utils/withCliCommandDescriptionDocsUrl.js";
import {documentationPageUrls} from "../../../../config.js";
import withOra from "../../../../utils/withOra.js";
import {resolveModelDestination} from "../../../../utils/resolveModelDestination.js";
import {resolveModelArgToFilePathOrUrl} from "../../../../utils/resolveModelDestination.js";
import {printModelDestination} from "../../../utils/printModelDestination.js";
import {getGgufMetadataKeyValue} from "../../../../gguf/utils/getGgufMetadataKeyValue.js";
import {GgufTensorInfo} from "../../../../gguf/types/GgufTensorInfoTypes.js";
import {toBytes} from "../../../utils/toBytes.js";
import {printDidYouMeanUri} from "../../../utils/resolveCommandGgufPath.js";
import {isModelUri} from "../../../../utils/parseModelUri.js";

type InspectGgufCommand = {
modelPath: string,
Expand Down Expand Up @@ -91,13 +93,23 @@ export const InspectGgufCommand: CommandModule<object, InspectGgufCommand> = {
async handler({
modelPath: ggufPath, header: headerArg, key, noSplice, fullTensorInfo, fullMetadataArrays, plainJson, outputToJsonFile
}: InspectGgufCommand) {
const resolvedModelDestination = resolveModelDestination(ggufPath);
const resolvedGgufPath = resolvedModelDestination.type == "file"
? resolvedModelDestination.path
: resolvedModelDestination.url;

const headers = resolveHeaderFlag(headerArg);

const [resolvedModelDestination, resolvedGgufPath] = (!plainJson && isModelUri(ggufPath))
? await withOra({
loading: chalk.blue("Resolving model URI"),
success: chalk.blue("Resolved model URI"),
fail: chalk.blue("Failed to resolve model URI"),
noSuccessLiveStatus: true
}, () => resolveModelArgToFilePathOrUrl(ggufPath, headers))
: await resolveModelArgToFilePathOrUrl(ggufPath, headers);

if (resolvedModelDestination.type === "file" && !await fs.pathExists(resolvedGgufPath)) {
console.error(`${chalk.red("File does not exist:")} ${resolvedGgufPath}`);
printDidYouMeanUri(ggufPath);
process.exit(1);
}

if (!plainJson)
printModelDestination(resolvedModelDestination);

Expand Down
Loading
Loading