docs: shorter model URI

giladgd · giladgd · commit 75b0d15cfa48 · 2025-01-28T03:58:51.000+02:00
diff --git a/docs/cli/pull.md b/docs/cli/pull.md
@@ -20,6 +20,7 @@ If a file already exists and its size matches the expected size, it will not be
 
 The supported URI schemes are:
 - **HTTP:** `https://`, `http://`
+- **Hugging Face:** `hf:<user>/<model>:<quant>` (`#<quant>` is optional, [but recommended](../guide/downloading-models.md#hf-scheme-specify-quant))
 - **Hugging Face:** `hf:<user>/<model>/<file-path>#<branch>` (`#<branch>` is optional)
 
 Learn more about using model URIs in the [Downloading Models guide](../guide/downloading-models.md#model-uris).
diff --git a/docs/guide/downloading-models.md b/docs/guide/downloading-models.md
@@ -75,14 +75,19 @@ You can reference models using a URI instead of their full download URL when usi
 
 When downloading a model from a URI, the model files will be prefixed with a corresponding adaptation of the URI.
 
-To reference a model from Hugging Face, you can use the scheme
-<br/>
-`hf:<user>/<model>/<file-path>#<branch>` (`#<branch>` is optional).
+To reference a model from Hugging Face, you can use one of these schemes:
+* `hf:<user>/<model>:<quant>` (`#<quant>` is optional, [but recommended](#hf-scheme-specify-quant))
+* `hf:<user>/<model>/<file-path>#<branch>` (`#<branch>` is optional)
 
-Here's an example usage of the Hugging Face URI scheme:
+Here are example usages of the Hugging Face URI scheme:
+::: code-group
+```[With quant]
+hf:mradermacher/Meta-Llama-3.1-8B-Instruct-GGUF:Q4_K_M
 ```
+```[Specific file]
 hf:mradermacher/Meta-Llama-3.1-8B-Instruct-GGUF/Meta-Llama-3.1-8B-Instruct.Q4_K_M.gguf
 ```
+:::
 
 When using a URI to reference a model,
 it's recommended [to add it to your `package.json` file](#cli) to ensure it's downloaded when running `npm install`,
@@ -98,7 +103,7 @@ const __dirname = path.dirname(fileURLToPath(import.meta.url));
 const modelsDirectory = path.join(__dirname, "models");
 
 const modelPath = await resolveModelFile(
-    "hf:user/model/model-file.gguf",
+    "hf:user/model:quant",
     modelsDirectory
 );
 
@@ -114,6 +119,13 @@ When a file is being downloaded, the download progress is shown in the console b
 Set the [`cli`](../api/type-aliases/ResolveModelFileOptions#cli) option to `false` to disable this behavior.
 :::
 
+::: tip TIP {#hf-scheme-specify-quant}
+When using the `hf:<user>/<model>:<quant>` scheme, always specify the quantization level in the URI (`:<quant>`).
+<br/>
+Doing this allows the resolver to resolve to a local model file without checking the model metadata on Hugging Face first,
+so it will be resolved offline and faster.
+:::
+
 ## Downloading Gated Models From Hugging Face {#hf-token}
 Some models on Hugging Face are "gated", meaning they require a manual consent from you before you can download them.
 
diff --git a/src/cli/utils/resolveModelRecommendationFileOptions.ts b/src/cli/utils/resolveModelRecommendationFileOptions.ts
@@ -3,6 +3,10 @@ import {resolveModelDestination} from "../../utils/resolveModelDestination.js";
 export type ModelURI = `${
     `http://${string}/${string}` |
     `https://${string}/${string}` |
+    `hf:${string}/${string}:${string}` |
+    `huggingface:${string}/${string}:${string}` |
+    `hf.co/${string}/${string}:${string}` |
+    `huggingface.co/${string}/${string}:${string}` |
     `hf:${string}/${string}/${string}` |
     `huggingface:${string}/${string}/${string}`
 }${
diff --git a/src/utils/createModelDownloader.ts b/src/utils/createModelDownloader.ts
@@ -17,6 +17,7 @@ export type ModelDownloaderOptions = ({
      *
      * The supported URI schemes are:
      * - **HTTP:** `https://`, `http://`
+     * - **Hugging Face:** `hf:<user>/<model>:<quant>` (`#<quant>` is optional, but recommended)
      * - **Hugging Face:** `hf:<user>/<model>/<file-path>#<branch>` (`#<branch>` is optional)
      */
     modelUri: string
@@ -83,6 +84,7 @@ export type ModelDownloaderOptions = ({
  *
  * The supported URI schemes are:
  * - **HTTP:** `https://`, `http://`
+ * - **Hugging Face:** `hf:<user>/<model>:<quant>` (`#<quant>` is optional, but recommended)
  * - **Hugging Face:** `hf:<user>/<model>/<file-path>#<branch>` (`#<branch>` is optional)
  * @example
  * ```typescript
@@ -112,7 +114,7 @@ export type ModelDownloaderOptions = ({
  * const __dirname = path.dirname(fileURLToPath(import.meta.url));
  *
  * const downloader = await createModelDownloader({
- *     modelUri: "hf:user/model/model-file.gguf",
+ *     modelUri: "hf:user/model:quant",
  *     dirPath: path.join(__dirname, "models")
  * });
  * const modelPath = await downloader.download();
@@ -153,7 +155,11 @@ export function createModelDownloader(options: ModelDownloaderOptions) {
  *         dirPath: path.join(__dirname, "models")
  *     }),
  *     createModelDownloader({
- *         modelUri: "hf:user/model/model2.gguf",
+ *         modelUri: "hf:user/model2:quant",
+ *         dirPath: path.join(__dirname, "models")
+ *     }),
+ *     createModelDownloader({
+ *         modelUri: "hf:user/model/model3.gguf",
  *         dirPath: path.join(__dirname, "models")
  *     })
  * ];
@@ -162,7 +168,8 @@ export function createModelDownloader(options: ModelDownloaderOptions) {
  * });
  * const [
  *     model1Path,
- *     model2Path
+ *     model2Path,
+ *     model3Path
  * ] = await combinedDownloader.download();
  *
  * const llama = await getLlama();
@@ -172,6 +179,9 @@ export function createModelDownloader(options: ModelDownloaderOptions) {
  * const model2 = await llama.loadModel({
  *     modelPath: model2Path!
  * });
+ * const model3 = await llama.loadModel({
+ *     modelPath: model3Path!
+ * });
  * ```
  */
 export async function combineModelDownloaders(
diff --git a/src/utils/resolveModelFile.ts b/src/utils/resolveModelFile.ts
@@ -111,7 +111,7 @@ export type ResolveModelFileOptions = {
  *
  * // resolve a model from Hugging Face to the models directory
  * const modelPath = await resolveModelFile(
- *     "hf:user/model/model-file.gguf",
+ *     "hf:user/model:quant",
  *     path.join(__dirname, "models")
  * );
  *

Original file line number	Diff line number	Diff line change
`@@ -111,7 +111,7 @@ export type ResolveModelFileOptions = {`
`111`	`111`	`*`
`112`	`112`	`* // resolve a model from Hugging Face to the models directory`
`113`	`113`	`* const modelPath = await resolveModelFile(`
`114`		`- * "hf:user/model/model-file.gguf",`
	`114`	`+ * "hf:user/model:quant",`
`115`	`115`	`* path.join(__dirname, "models")`
`116`	`116`	`* );`
`117`	`117`	`*`