Skip to content

Commit 35e6f50

Browse files
authored
feat: automatically adapt to current free VRAM state (#182)
* feat: read tensor info from `gguf` files * feat: `inspect gguf` command * feat: `inspect measure` command * feat: `readGgufFileInfo` function * feat: GGUF file info on `LlamaModel` * feat: estimate VRAM usage of the model and context with certain options to adapt to current VRAM state and set great defaults for `gpuLayers` and `contextSize`. no manual configuration of those options is needed anymore to maximize performance * feat: `JinjaTemplateChatWrapper` * feat: use the `tokenizer.chat_template` header from the `gguf` file when available - use it to find a better specialized chat wrapper or use `JinjaTemplateChatWrapper` with it as a fallback * feat: improve `resolveChatWrapper` * feat: simplify generation CLI commands: `chat`, `complete`, `infill` * feat: read GPU device names * feat: get token type * refactor: gguf * test: separate gguf tests to model dependent and model independent tests * test: switch to new vitest test signature * fix: use the new `llama.cpp` CUDA flag * fix: improve chat wrappers tokenization * fix: bugs
1 parent f3b7f81 commit 35e6f50

File tree

146 files changed

+10767
-2632
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

146 files changed

+10767
-2632
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ node_modules
1111
/.eslintcache
1212
/.vitepress/.cache
1313
/test/.models
14+
/test/temp
1415
/coverage
1516

1617
/llama/compile_commands.json
@@ -20,6 +21,8 @@ node_modules
2021
/llama/lastBuild.json
2122
/llama/gitRelease.bundle
2223
/llama/.temp
24+
/llama/.idea
25+
/llama/cmake-build-debug
2326
/llama/localBuilds
2427
/llama/Release
2528
/llama/Debug

.vitepress/config.ts

Lines changed: 28 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,22 @@ export default defineConfig({
108108
pageData.frontmatter.editLink = false;
109109
pageData.frontmatter.lastUpdated = false;
110110
}
111+
112+
let canonicalUrl = hostname + pageData.relativePath;
113+
if (canonicalUrl.endsWith("/index.html"))
114+
canonicalUrl = canonicalUrl.slice(0, -"index.html".length);
115+
if (canonicalUrl.endsWith("/index.md"))
116+
canonicalUrl = canonicalUrl.slice(0, -"index.md".length);
117+
else if (canonicalUrl.endsWith(".html"))
118+
canonicalUrl = canonicalUrl.slice(0, -".html".length);
119+
else if (canonicalUrl.endsWith(".md"))
120+
canonicalUrl = canonicalUrl.slice(0, -".md".length);
121+
122+
pageData.frontmatter.head ??= [];
123+
pageData.frontmatter.head.push([
124+
"link",
125+
{rel: "canonical", href: canonicalUrl}
126+
])
111127
},
112128
themeConfig: {
113129
editLink: {
@@ -183,7 +199,16 @@ export default defineConfig({
183199
{text: "Download", link: "/download"},
184200
{text: "Complete", link: "/complete"},
185201
{text: "Infill", link: "/infill"},
186-
{text: "Inspect", link: "/inspect"},
202+
{
203+
text: "Inspect",
204+
link: "/inspect",
205+
collapsed: true,
206+
items: [
207+
{text: "GPU", link: "/inspect/gpu"},
208+
{text: "GGUF", link: "/inspect/gguf"},
209+
{text: "Measure", link: "/inspect/measure"},
210+
]
211+
},
187212
{text: "Build", link: "/build"},
188213
{text: "Clear", link: "/clear"}
189214
]
@@ -302,7 +327,7 @@ function orderClasses(sidebar: typeof typedocSidebar) {
302327
items: []
303328
};
304329
(classes.items as DefaultTheme.SidebarItem[]).push(LlamaTextGroup);
305-
const LlamaTextGroupItemsOrder = ["SpecialToken", "BuiltinSpecialToken"];
330+
const LlamaTextGroupItemsOrder = ["SpecialTokensText", "SpecialToken"];
306331

307332
groupItems(
308333
classes.items,
@@ -327,7 +352,7 @@ function orderTypes(sidebar: typeof typedocSidebar) {
327352
(item) => (
328353
item.text === "BatchItem" ||
329354
item.text === "CustomBatchingDispatchSchedule" ||
330-
item.text === "CustomBatchingPrioritizeStrategy" ||
355+
item.text === "CustomBatchingPrioritizationStrategy" ||
331356
item.text === "PrioritizedBatchItem"
332357
),
333358
{collapsed: false}

.vitepress/utils/getCommandHtmlDoc.ts

Lines changed: 81 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,54 @@ import {cliBinName, npxRunPrefix} from "../../src/config.js";
44
import {buildHtmlTable} from "./buildHtmlTable.js";
55
import {buildHtmlHeading} from "./buildHtmlHeading.js";
66

7-
export async function getCommandHtmlDoc(command: CommandModule<any, any>, cliName: string = cliBinName) {
8-
const title = cliName + " " + (command.command ?? "");
7+
export async function getCommandHtmlDoc(command: CommandModule<any, any>, {
8+
cliName = cliBinName,
9+
parentCommand,
10+
subCommandsParentPageLink
11+
}: {
12+
cliName?: string,
13+
parentCommand?: CommandModule<any, any>,
14+
subCommandsParentPageLink?: string
15+
} = {}) {
16+
const currentCommandCliCommand = resolveCommandCliCommand(command);
17+
const resolvedParentCommandCliCommand = resolveCommandCliCommand(parentCommand);
18+
const title = cliName + " " + (resolvedParentCommandCliCommand ?? "<command>").replace("<command>", currentCommandCliCommand ?? "");
919
const description = command.describe ?? "";
10-
const optionGroups = await getOptionsGroupFromCommand(command);
20+
const {subCommands, optionGroups} = await parseCommandDefinition(command);
1121

1222
let res = "";
1323

24+
if (subCommands.length > 0) {
25+
res += buildHtmlHeading("h2", htmlEscape("Commands"), "commands");
26+
27+
res += buildHtmlTable(
28+
[
29+
"Command",
30+
"Description"
31+
].map(htmlEscape),
32+
subCommands
33+
.map((subCommand) => {
34+
if (subCommand.command == null || subCommand.describe === false)
35+
return null;
36+
37+
const resolvedCommandCliCommand = resolveCommandCliCommand(subCommand) ?? "";
38+
const commandPageLink = resolveCommandPageLink(subCommand);
39+
40+
let cliCommand = resolvedCommandCliCommand;
41+
cliCommand = (currentCommandCliCommand ?? "<command>").replace("<command>", cliCommand);
42+
43+
if (parentCommand != null)
44+
cliCommand = (resolvedParentCommandCliCommand ?? "<command>").replace("<command>", cliCommand);
45+
46+
return [
47+
`<a href="${subCommandsParentPageLink != null ? (subCommandsParentPageLink + "/") : ""}${commandPageLink}"><code>` + htmlEscape(cliName + " " + cliCommand) + "</code></a>",
48+
htmlEscape(String(subCommand.describe ?? ""))
49+
];
50+
})
51+
.filter((row): row is string[] => row != null)
52+
);
53+
}
54+
1455
if (optionGroups.length !== 0) {
1556
res += buildHtmlHeading("h2", htmlEscape("Options"), "options");
1657

@@ -37,7 +78,10 @@ export async function getCommandHtmlDoc(command: CommandModule<any, any>, cliNam
3778
}
3879

3980

40-
async function getOptionsGroupFromCommand(command: CommandModule<any, any>): Promise<OptionsGroup[]> {
81+
async function parseCommandDefinition(command: CommandModule<any, any>): Promise<{
82+
subCommands: CommandModule<any, any>[],
83+
optionGroups: OptionsGroup[]
84+
}> {
4185
const yargsStub = getYargsStub();
4286
function getYargsStub() {
4387
function option(name: string, option: Options) {
@@ -57,10 +101,16 @@ async function getOptionsGroupFromCommand(command: CommandModule<any, any>): Pro
57101
return yargsStub;
58102
}
59103

60-
return {option};
104+
function command(subCommand: CommandModule<any, any>) {
105+
subCommands.push(subCommand);
106+
return yargsStub;
107+
}
108+
109+
return {option, command};
61110
}
62111

63112
const options: Record<string, {name: string, option: Options}[]> = {};
113+
const subCommands: CommandModule<any, any>[] = [];
64114
const groups: string[] = [];
65115

66116
if (command.builder instanceof Function)
@@ -97,10 +147,13 @@ async function getOptionsGroupFromCommand(command: CommandModule<any, any>): Pro
97147
return 0;
98148
});
99149

100-
return groups.map((group) => ({
101-
name: normalizeGroupName(group),
102-
options: options[group]!
103-
}));
150+
return {
151+
subCommands,
152+
optionGroups: groups.map((group) => ({
153+
name: normalizeGroupName(group),
154+
options: options[group]!
155+
}))
156+
};
104157
}
105158

106159
function normalizeGroupName(groupName: string): string {
@@ -156,8 +209,12 @@ function renderOptionsGroupOptionsTable(options: {name: string, option: Options}
156209

157210
let optionDescription: string[] = option.description != null ? [htmlEscape(option.description)] : [];
158211

159-
if (option.default != null) {
160-
optionDescription.push(`(${htmlEscape("default: ")}<code>${htmlEscape(option.default)}</code>)`);
212+
const hasDefaultDescription = option.defaultDescription != null && option.defaultDescription.trim().length > 0;
213+
if (option.default != null || hasDefaultDescription) {
214+
if (hasDefaultDescription && option.defaultDescription != null)
215+
optionDescription.push(`<span style="opacity: 0.72">(${htmlEscape("default: ")}${htmlEscape(option.defaultDescription.trim())})</span>`);
216+
else
217+
optionDescription.push(`<span style="opacity: 0.72">(${htmlEscape("default: ")}<code>${htmlEscape(option.default)}</code>)</span>`);
161218
}
162219

163220
if (option.type != null) {
@@ -184,6 +241,19 @@ function renderOptionsGroupOptionsTable(options: {name: string, option: Options}
184241
return buildHtmlTable(tableHeaders, tableRows);
185242
}
186243

244+
function resolveCommandCliCommand(command?: CommandModule<any, any>) {
245+
if (command == null)
246+
return undefined;
247+
248+
return command.command instanceof Array
249+
? command.command[0]
250+
: command.command;
251+
}
252+
253+
function resolveCommandPageLink(command: CommandModule<any, any>) {
254+
return resolveCommandCliCommand(command)?.split(" ")?.[0];
255+
}
256+
187257
type OptionsGroup = {
188258
name: string,
189259
options: Array<{

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
## Features
2121
* Run a text generation model locally on your machine
2222
* Metal, CUDA and Vulkan support
23-
* Pre-built binaries are provided, with a fallback to building from source without `node-gyp` or Python
23+
* Pre-built binaries are provided, with a fallback to building from source _**without**_ `node-gyp` or Python
2424
* Chat with a model using a chat wrapper
2525
* Use the CLI to chat with a model without writing any code
2626
* Up-to-date with the latest version of `llama.cpp`. Download and compile the latest release with a single CLI command.

docs/guide/cli/cli.data.ts

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,17 @@ import {BuildCommand} from "../../../src/cli/commands/BuildCommand.js";
44
import {ChatCommand} from "../../../src/cli/commands/ChatCommand.js";
55
import {CompleteCommand} from "../../../src/cli/commands/CompleteCommand.js";
66
import {InfillCommand} from "../../../src/cli/commands/InfillCommand.js";
7-
import {InspectCommand} from "../../../src/cli/commands/InspectCommand.js";
7+
import {InspectCommand} from "../../../src/cli/commands/inspect/InspectCommand.js";
8+
import {InspectGpuCommand} from "../../../src/cli/commands/inspect/commands/InspectGpuCommand.js";
9+
import {InspectGgufCommand} from "../../../src/cli/commands/inspect/commands/InspectGgufCommand.js";
810
import {DownloadCommand} from "../../../src/cli/commands/DownloadCommand.js";
911
import {ClearCommand} from "../../../src/cli/commands/ClearCommand.js";
1012
import {htmlEscape} from "../../../.vitepress/utils/htmlEscape.js";
1113
import {cliBinName, npxRunPrefix} from "../../../src/config.js";
1214
import {buildHtmlHeading} from "../../../.vitepress/utils/buildHtmlHeading.js";
1315
import {buildHtmlTable} from "../../../.vitepress/utils/buildHtmlTable.js";
1416
import {setIsInDocumentationMode} from "../../../src/state.js";
17+
import {InspectMeasureCommand} from "../../../src/cli/commands/inspect/commands/InspectMeasureCommand.js";
1518

1619
export default {
1720
async load() {
@@ -31,7 +34,20 @@ export default {
3134
chat: await getCommandHtmlDoc(ChatCommand),
3235
complete: await getCommandHtmlDoc(CompleteCommand),
3336
infill: await getCommandHtmlDoc(InfillCommand),
34-
inspect: await getCommandHtmlDoc(InspectCommand),
37+
inspect: {
38+
index: await getCommandHtmlDoc(InspectCommand, {
39+
subCommandsParentPageLink: "inspect"
40+
}),
41+
gpu: await getCommandHtmlDoc(InspectGpuCommand, {
42+
parentCommand: InspectCommand
43+
}),
44+
gguf: await getCommandHtmlDoc(InspectGgufCommand, {
45+
parentCommand: InspectCommand
46+
}),
47+
measure: await getCommandHtmlDoc(InspectMeasureCommand, {
48+
parentCommand: InspectCommand
49+
})
50+
},
3551
download: await getCommandHtmlDoc(DownloadCommand),
3652
build: await getCommandHtmlDoc(BuildCommand),
3753
clear: await getCommandHtmlDoc(ClearCommand)

docs/guide/cli/inspect.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ outline: deep
55

66
<script setup lang="ts">
77
import {data as docs} from "./cli.data.js";
8-
const commandDoc = docs.inspect;
8+
const commandDoc = docs.inspect.index;
99
</script>
1010

1111
{{commandDoc.description}}

docs/guide/cli/inspect/gguf.md

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
---
2+
outline: deep
3+
---
4+
# `inspect gguf` command
5+
6+
<script setup lang="ts">
7+
import {data as docs} from "../cli.data.js";
8+
const commandDoc = docs.inspect.gguf;
9+
</script>
10+
11+
{{commandDoc.description}}
12+
13+
## Usage
14+
```shell-vue
15+
{{commandDoc.usage}}
16+
```
17+
<div v-html="commandDoc.options"></div>

docs/guide/cli/inspect/gpu.md

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
---
2+
outline: deep
3+
---
4+
# `inspect gpu` command
5+
6+
<script setup lang="ts">
7+
import {data as docs} from "../cli.data.js";
8+
const commandDoc = docs.inspect.gpu;
9+
</script>
10+
11+
{{commandDoc.description}}
12+
13+
## Usage
14+
```shell-vue
15+
{{commandDoc.usage}}
16+
```
17+
<div v-html="commandDoc.options"></div>

docs/guide/cli/inspect/measure.md

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
---
2+
outline: deep
3+
---
4+
# `inspect measure` command
5+
6+
<script setup lang="ts">
7+
import {data as docs} from "../cli.data.js";
8+
const commandDoc = docs.inspect.measure;
9+
</script>
10+
11+
{{commandDoc.description}}
12+
13+
## Usage
14+
```shell-vue
15+
{{commandDoc.usage}}
16+
```
17+
<div v-html="commandDoc.options"></div>

llama/CMakeLists.txt

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -29,19 +29,19 @@ include_directories("gpuInfo")
2929
include_directories("llama.cpp")
3030
include_directories("./llama.cpp/common")
3131

32-
if (LLAMA_CUBLAS)
32+
if (LLAMA_CUDA)
3333
cmake_minimum_required(VERSION 3.17)
3434

3535
find_package(CUDAToolkit)
3636
if (CUDAToolkit_FOUND)
37-
message(STATUS "Using cuBLAS for GPU info")
37+
message(STATUS "Using CUDA for GPU info")
3838

3939
enable_language(CUDA)
4040

4141
set(GPU_INFO_HEADERS ${GPU_INFO_HEADERS} gpuInfo/cuda-gpu-info.h)
4242
set(GPU_INFO_SOURCES ${GPU_INFO_SOURCES} gpuInfo/cuda-gpu-info.cu)
4343

44-
add_compile_definitions(GPU_INFO_USE_CUBLAS)
44+
add_compile_definitions(GPU_INFO_USE_CUDA)
4545

4646
if (LLAMA_STATIC)
4747
set(LLAMA_EXTRA_LIBS ${GPU_INFO_EXTRA_LIBS} CUDA::cudart_static)
@@ -60,7 +60,7 @@ if (LLAMA_CUBLAS)
6060
endif()
6161
endif()
6262
else()
63-
message(FATAL_ERROR "cuBLAS was not found")
63+
message(FATAL_ERROR "CUDA was not found")
6464
endif()
6565
endif()
6666

@@ -100,7 +100,7 @@ if (LLAMA_HIPBLAS)
100100

101101
if (${hipblas_FOUND} AND ${hip_FOUND})
102102
message(STATUS "Using HIP and hipBLAS for GPU info")
103-
add_compile_definitions(GPU_INFO_USE_HIPBLAS GPU_INFO_USE_CUBLAS)
103+
add_compile_definitions(GPU_INFO_USE_HIPBLAS GPU_INFO_USE_CUDA)
104104
add_library(gpu-info-rocm OBJECT gpuInfo/cuda-gpu-info.cu gpuInfo/cuda-gpu-info.h)
105105
set_source_files_properties(gpuInfo/cuda-gpu-info.cu PROPERTIES LANGUAGE CXX)
106106
target_link_libraries(gpu-info-rocm PRIVATE hip::device PUBLIC hip::host roc::rocblas roc::hipblas)

0 commit comments

Comments
 (0)