Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8,059 changes: 4,117 additions & 3,942 deletions package-lock.json

Large diffs are not rendered by default.

60 changes: 30 additions & 30 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -127,74 +127,74 @@
},
"homepage": "https://node-llama-cpp.withcat.ai",
"devDependencies": {
"@commitlint/cli": "^19.6.0",
"@commitlint/cli": "^19.6.1",
"@commitlint/config-conventional": "^19.6.0",
"@eslint/compat": "^1.2.3",
"@fontsource/inter": "^5.1.0",
"@nolebase/vitepress-plugin-git-changelog": "^2.11.1",
"@nolebase/vitepress-plugin-og-image": "^2.11.1",
"@eslint/compat": "^1.2.4",
"@fontsource/inter": "^5.1.1",
"@nolebase/vitepress-plugin-git-changelog": "^2.12.0",
"@nolebase/vitepress-plugin-og-image": "^2.12.0",
"@resvg/resvg-js": "^2.6.2",
"@semantic-release/exec": "^6.0.3",
"@semantic-release/github": "11.0.1",
"@semantic-release/npm": "12.0.1",
"@shikijs/vitepress-twoslash": "^1.24.0",
"@stylistic/eslint-plugin": "^2.11.0",
"@shikijs/vitepress-twoslash": "^1.26.1",
"@stylistic/eslint-plugin": "^2.12.1",
"@types/async-retry": "^1.4.9",
"@types/bytes": "^3.1.4",
"@types/cross-spawn": "^6.0.2",
"@types/bytes": "^3.1.5",
"@types/cross-spawn": "^6.0.6",
"@types/fs-extra": "^11.0.4",
"@types/node": "^22.10.1",
"@types/node": "^22.10.5",
"@types/proper-lockfile": "^4.1.4",
"@types/semver": "^7.5.8",
"@types/validate-npm-package-name": "^4.0.2",
"@types/which": "^3.0.4",
"@types/yargs": "^17.0.33",
"@vitest/coverage-v8": "^2.1.6",
"@vitest/ui": "^2.1.6",
"electron": "^33.2.1",
"eslint": "^9.16.0",
"eslint-import-resolver-typescript": "^3.6.3",
"@vitest/coverage-v8": "^2.1.8",
"@vitest/ui": "^2.1.8",
"electron": "^33.3.1",
"eslint": "^9.17.0",
"eslint-import-resolver-typescript": "^3.7.0",
"eslint-plugin-import": "^2.31.0",
"eslint-plugin-jsdoc": "^50.6.0",
"eslint-plugin-n": "^17.14.0",
"eslint-plugin-jsdoc": "^50.6.1",
"eslint-plugin-n": "^17.15.1",
"feed": "^4.2.2",
"husky": "^9.1.7",
"rehype": "^13.0.1",
"rehype": "^13.0.2",
"rimraf": "^6.0.1",
"semantic-release": "^24.2.0",
"semantic-release": "^24.2.1",
"sharp": "^0.33.5",
"tslib": "^2.8.1",
"typedoc": "^0.27.2",
"typedoc-plugin-markdown": "^4.3.1",
"typedoc-plugin-mdn-links": "^4.0.3",
"typedoc-vitepress-theme": "^1.1.0",
"typedoc": "^0.27.6",
"typedoc-plugin-markdown": "^4.4.1",
"typedoc-plugin-mdn-links": "^4.0.7",
"typedoc-vitepress-theme": "^1.1.1",
"typescript": "^5.7.2",
"typescript-eslint": "^8.16.0",
"vite-node": "^2.1.6",
"typescript-eslint": "^8.19.1",
"vite-node": "^2.1.8",
"vitepress": "^1.5.0",
"vitest": "^2.1.6",
"zx": "^8.2.4"
"vitest": "^2.1.8",
"zx": "^8.3.0"
},
"dependencies": {
"@huggingface/jinja": "^0.3.2",
"async-retry": "^1.3.3",
"bytes": "^3.1.2",
"chalk": "^5.3.0",
"chalk": "^5.4.1",
"chmodrp": "^1.0.2",
"cmake-js": "^7.3.0",
"cross-env": "^7.0.3",
"cross-spawn": "^7.0.6",
"env-var": "^7.5.0",
"filenamify": "^6.0.0",
"fs-extra": "^11.2.0",
"ignore": "^5.3.2",
"ignore": "^7.0.0",
"ipull": "^3.9.2",
"is-unicode-supported": "^2.1.0",
"lifecycle-utils": "^2.0.0",
"log-symbols": "^7.0.0",
"nanoid": "^5.0.9",
"node-addon-api": "^8.3.0",
"octokit": "^4.0.2",
"octokit": "^4.0.3",
"ora": "^8.1.1",
"pretty-ms": "^9.2.0",
"proper-lockfile": "^4.1.2",
Expand Down
10 changes: 5 additions & 5 deletions src/cli/commands/DebugCommand.ts
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
import os from "os";
import {CommandModule} from "yargs";
import bytes from "bytes";
import chalk from "chalk";
import {getLlama} from "../../bindings/getLlama.js";
import {prettyPrintObject} from "../../utils/prettyPrintObject.js";
import {logUsedGpuTypeOption} from "../utils/logUsedGpuTypeOption.js";
import {toBytes} from "../utils/toBytes.js";

const debugFunctions = ["vram", "cmakeOptions"] as const;
type DebugCommand = {
Expand Down Expand Up @@ -51,11 +51,11 @@ async function DebugVramFunction() {
logUsedGpuTypeOption(llama.gpu);
console.info();

console.info(`${chalk.yellow("Used VRAM:")} ${getPercentageString(vramStatus.used, vramStatus.total)}% ${chalk.gray("(" + bytes(vramStatus.used) + "/" + bytes(vramStatus.total) + ")")}`);
console.info(`${chalk.yellow("Free VRAM:")} ${getPercentageString(vramStatus.free, vramStatus.total)}% ${chalk.gray("(" + bytes(vramStatus.free) + "/" + bytes(vramStatus.total) + ")")}`);
console.info(`${chalk.yellow("Used VRAM:")} ${getPercentageString(vramStatus.used, vramStatus.total)}% ${chalk.gray("(" + toBytes(vramStatus.used) + "/" + toBytes(vramStatus.total) + ")")}`);
console.info(`${chalk.yellow("Free VRAM:")} ${getPercentageString(vramStatus.free, vramStatus.total)}% ${chalk.gray("(" + toBytes(vramStatus.free) + "/" + toBytes(vramStatus.total) + ")")}`);
console.info();
console.info(`${chalk.yellow("Used RAM:")} ${getPercentageString(usedMemory, totalMemory)}% ${chalk.gray("(" + bytes(usedMemory) + "/" + bytes(totalMemory) + ")")}`);
console.info(`${chalk.yellow("Free RAM:")} ${getPercentageString(freeMemory, totalMemory)}% ${chalk.gray("(" + bytes(freeMemory) + "/" + bytes(totalMemory) + ")")}`);
console.info(`${chalk.yellow("Used RAM:")} ${getPercentageString(usedMemory, totalMemory)}% ${chalk.gray("(" + toBytes(usedMemory) + "/" + toBytes(totalMemory) + ")")}`);
console.info(`${chalk.yellow("Free RAM:")} ${getPercentageString(freeMemory, totalMemory)}% ${chalk.gray("(" + toBytes(freeMemory) + "/" + toBytes(totalMemory) + ")")}`);
}

async function DebugCmakeOptionsFunction() {
Expand Down
10 changes: 5 additions & 5 deletions src/cli/commands/inspect/commands/InspectEstimateCommand.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import {CommandModule} from "yargs";
import chalk from "chalk";
import bytes from "bytes";
import {readGgufFileInfo} from "../../../../gguf/readGgufFileInfo.js";
import {resolveHeaderFlag} from "../../../utils/resolveHeaderFlag.js";
import {withCliCommandDescriptionDocsUrl} from "../../../utils/withCliCommandDescriptionDocsUrl.js";
Expand All @@ -20,6 +19,7 @@ import {getPrettyBuildGpuName} from "../../../../bindings/consts.js";
import withOra from "../../../../utils/withOra.js";
import {resolveModelDestination} from "../../../../utils/resolveModelDestination.js";
import {printModelDestination} from "../../../utils/printModelDestination.js";
import {toBytes} from "../../../utils/toBytes.js";

type InspectEstimateCommand = {
modelPath: string,
Expand Down Expand Up @@ -198,7 +198,7 @@ export const InspectEstimateCommand: CommandModule<object, InspectEstimateComman
value: getPrettyBuildGpuName(llama.gpu)
}, {
title: "VRAM",
value: bytes(vramState.total)
value: toBytes(vramState.total)
}, {
title: "Name",
value: toOneLine(deviceNames.join(", "))
Expand All @@ -219,7 +219,7 @@ export const InspectEstimateCommand: CommandModule<object, InspectEstimateComman
)
}, {
title: "Size",
value: bytes(ggufInsights.modelSize)
value: toBytes(ggufInsights.modelSize)
}, {
show: ggufInsights.trainContextSize != null,
title: "Train context size",
Expand Down Expand Up @@ -262,10 +262,10 @@ function logCompatibilityScore(
}, {
show: llama.gpu !== false,
title: "VRAM usage",
value: () => bytes(compatibilityScore.resolvedValues.totalVramUsage)
value: () => toBytes(compatibilityScore.resolvedValues.totalVramUsage)
}, {
title: "RAM usage",
value: () => bytes(compatibilityScore.resolvedValues.totalRamUsage)
value: () => toBytes(compatibilityScore.resolvedValues.totalRamUsage)
}, {
show: flashAttention,
title: "Flash attention",
Expand Down
6 changes: 3 additions & 3 deletions src/cli/commands/inspect/commands/InspectGgufCommand.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ import path from "path";
import process from "process";
import {CommandModule} from "yargs";
import chalk from "chalk";
import bytes from "bytes";
import fs from "fs-extra";
import {readGgufFileInfo} from "../../../../gguf/readGgufFileInfo.js";
import {prettyPrintObject, PrettyPrintObjectOptions} from "../../../../utils/prettyPrintObject.js";
Expand All @@ -15,6 +14,7 @@ import {resolveModelDestination} from "../../../../utils/resolveModelDestination
import {printModelDestination} from "../../../utils/printModelDestination.js";
import {getGgufMetadataKeyValue} from "../../../../gguf/utils/getGgufMetadataKeyValue.js";
import {GgufTensorInfo} from "../../../../gguf/types/GgufTensorInfoTypes.js";
import {toBytes} from "../../../utils/toBytes.js";

type InspectGgufCommand = {
modelPath: string,
Expand Down Expand Up @@ -206,8 +206,8 @@ export const InspectGgufCommand: CommandModule<object, InspectGgufCommand> = {

console.info(`${chalk.yellow("GGUF version:")} ${parsedMetadata.version}`);
console.info(`${chalk.yellow("Tensor count:")} ${parsedMetadata.totalTensorCount.toLocaleString("en-US", numberLocaleFormattingOptions)}`);
console.info(`${chalk.yellow("Metadata size:")} ${bytes(parsedMetadata.totalMetadataSize)}`);
console.info(`${chalk.yellow("Tensor info size:")} ${bytes(parsedMetadata.totalTensorInfoSize!)}`);
console.info(`${chalk.yellow("Metadata size:")} ${toBytes(parsedMetadata.totalMetadataSize)}`);
console.info(`${chalk.yellow("Tensor info size:")} ${toBytes(parsedMetadata.totalTensorInfoSize!)}`);
console.info(`${chalk.yellow("File type:")} ${fileTypeName ?? ""} ${chalk.white(`(${parsedMetadata.metadata.general?.file_type})`)}`);
console.info(`${chalk.yellow("Metadata:")} ${prettyPrintObject(parsedMetadata.metadata, undefined, metadataPrettyPrintOptions)}`);
console.info(`${chalk.yellow("Tensor info:")} ${prettyPrintObject(parsedMetadata.fullTensorInfo, undefined, tensorInfoPrettyPrintOptions)}`);
Expand Down
16 changes: 8 additions & 8 deletions src/cli/commands/inspect/commands/InspectGpuCommand.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import os from "os";
import {CommandModule} from "yargs";
import bytes from "bytes";
import chalk from "chalk";
import {getLlamaForOptions} from "../../../../bindings/getLlama.js";
import {detectAvailableComputeLayers} from "../../../../bindings/utils/detectAvailableComputeLayers.js";
Expand All @@ -14,6 +13,7 @@ import {Llama} from "../../../../bindings/Llama.js";
import {getPlatformInfo} from "../../../../bindings/utils/getPlatformInfo.js";
import {getLinuxDistroInfo} from "../../../../bindings/utils/getLinuxDistroInfo.js";
import {isRunningUnderRosetta} from "../../../utils/isRunningUnderRosetta.js";
import {toBytes} from "../../../utils/toBytes.js";

type InspectGpuCommand = {
// no options for now
Expand Down Expand Up @@ -189,11 +189,11 @@ async function logGpuVramUsage(gpu: BuildGpu, llama: Llama) {
if (gpuDeviceNames.length > 0)
console.info(`${chalk.yellow(`${gpuName} device${gpuDeviceNames.length > 1 ? "s" : ""}:`)} ${gpuDeviceNames.join(", ")}`);

console.info(`${chalk.yellow(`${gpuName} used VRAM:`)} ${getPercentageString(vramState.used, vramState.total)}% ${chalk.gray("(" + bytes(vramState.used) + "/" + bytes(vramState.total) + ")")}`);
console.info(`${chalk.yellow(`${gpuName} free VRAM:`)} ${getPercentageString(vramState.free, vramState.total)}% ${chalk.gray("(" + bytes(vramState.free) + "/" + bytes(vramState.total) + ")")}`);
console.info(`${chalk.yellow(`${gpuName} used VRAM:`)} ${getPercentageString(vramState.used, vramState.total)}% ${chalk.gray("(" + toBytes(vramState.used) + "/" + toBytes(vramState.total) + ")")}`);
console.info(`${chalk.yellow(`${gpuName} free VRAM:`)} ${getPercentageString(vramState.free, vramState.total)}% ${chalk.gray("(" + toBytes(vramState.free) + "/" + toBytes(vramState.total) + ")")}`);

if (vramState.unifiedSize > 0)
console.info(`${chalk.yellow(`${gpuName} unified memory:`)} ${bytes(vramState.unifiedSize)} ${chalk.gray("(" + getPercentageString(vramState.unifiedSize, vramState.total) + "%)")}`);
console.info(`${chalk.yellow(`${gpuName} unified memory:`)} ${toBytes(vramState.unifiedSize)} ${chalk.gray("(" + getPercentageString(vramState.unifiedSize, vramState.total) + "%)")}`);
} catch (err) {}
}

Expand All @@ -215,15 +215,15 @@ async function logRamUsage(cpuMathCores?: number) {
if (cpuMathCores != null)
console.info(`${chalk.yellow("Math cores:")} ${cpuMathCores}`);

console.info(`${chalk.yellow("Used RAM:")} ${getPercentageString(usedMemory, totalMemory)}% ${chalk.gray("(" + bytes(usedMemory) + "/" + bytes(totalMemory) + ")")}`);
console.info(`${chalk.yellow("Free RAM:")} ${getPercentageString(freeMemory, totalMemory)}% ${chalk.gray("(" + bytes(freeMemory) + "/" + bytes(totalMemory) + ")")}`);
console.info(`${chalk.yellow("Used RAM:")} ${getPercentageString(usedMemory, totalMemory)}% ${chalk.gray("(" + toBytes(usedMemory) + "/" + toBytes(totalMemory) + ")")}`);
console.info(`${chalk.yellow("Free RAM:")} ${getPercentageString(freeMemory, totalMemory)}% ${chalk.gray("(" + toBytes(freeMemory) + "/" + toBytes(totalMemory) + ")")}`);
}

async function logSwapUsage(llama: Llama) {
const swapState = await llama.getSwapState();

console.info(`${chalk.yellow("Used swap:")} ${getPercentageString(swapState.used, swapState.allocated)}% ${chalk.gray("(" + bytes(swapState.used) + "/" + bytes(swapState.allocated) + ")")}`);
console.info(`${chalk.yellow("Max swap size:")} ${swapState.maxSize === Infinity ? "dynamic" : bytes(swapState.maxSize)}`);
console.info(`${chalk.yellow("Used swap:")} ${getPercentageString(swapState.used, swapState.allocated)}% ${chalk.gray("(" + toBytes(swapState.used) + "/" + toBytes(swapState.allocated) + ")")}`);
console.info(`${chalk.yellow("Max swap size:")} ${swapState.maxSize === Infinity ? "dynamic" : toBytes(swapState.maxSize)}`);
}

function getPercentageString(amount: number, total: number) {
Expand Down
30 changes: 15 additions & 15 deletions src/cli/commands/inspect/commands/InspectMeasureCommand.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ import {fork} from "node:child_process";
import os from "os";
import {CommandModule} from "yargs";
import chalk from "chalk";
import bytes from "bytes";
import stripAnsi from "strip-ansi";
import {readGgufFileInfo} from "../../../../gguf/readGgufFileInfo.js";
import {resolveCommandGgufPath} from "../../../utils/resolveCommandGgufPath.js";
Expand All @@ -21,6 +20,7 @@ import {getReadablePath} from "../../../utils/getReadablePath.js";
import {withCliCommandDescriptionDocsUrl} from "../../../utils/withCliCommandDescriptionDocsUrl.js";
import {documentationPageUrls} from "../../../../config.js";
import {Llama} from "../../../../bindings/Llama.js";
import {toBytes} from "../../../utils/toBytes.js";

type InspectMeasureCommand = {
modelPath?: string,
Expand Down Expand Up @@ -271,13 +271,13 @@ export const InspectMeasureCommand: CommandModule<object, InspectMeasureCommand>
});
const modelVramEstimation = modelResourceEstimation.gpuVram;
const modelVramEstimationDiffBytes = (modelVramEstimation < result.modelVramUsage ? "-" : "") +
bytes(Math.abs(result.modelVramUsage - modelVramEstimation));
toBytes(Math.abs(result.modelVramUsage - modelVramEstimation));
const modelVramEstimationDiffText = modelVramEstimationDiffBytes.padEnd(9, " ") + " " +
padStartAnsi("(" + renderDiffPercentageWithColors(((modelVramEstimation / result.modelVramUsage) - 1) * 100) + ")", 9);

const modelRamEstimation = modelResourceEstimation.cpuRam;
const modelRamEstimationDiffBytes = (modelRamEstimation < result.modelRamUsage ? "-" : "") +
bytes(Math.abs(result.modelRamUsage - modelRamEstimation));
toBytes(Math.abs(result.modelRamUsage - modelRamEstimation));
const modelRamEstimationDiffText = modelRamEstimationDiffBytes.padEnd(9, " ") + " " +
padStartAnsi("(" + renderDiffPercentageWithColors(((modelRamEstimation / result.modelRamUsage) - 1) * 100) + ")", 9);

Expand All @@ -294,7 +294,7 @@ export const InspectMeasureCommand: CommandModule<object, InspectMeasureCommand>
? undefined
: (
(contextVramEstimation < result.contextVramUsage ? "-" : "") +
bytes(Math.abs(result.contextVramUsage - contextVramEstimation))
toBytes(Math.abs(result.contextVramUsage - contextVramEstimation))
);
const contextVramEstimationDiffText = (
contextVramEstimation == null || contextVramEstimationDiffBytes == null || result.contextVramUsage == null
Expand All @@ -310,7 +310,7 @@ export const InspectMeasureCommand: CommandModule<object, InspectMeasureCommand>
? undefined
: (
(contextRamEstimation < result.contextRamUsage ? "-" : "") +
bytes(Math.abs(result.contextRamUsage - contextRamEstimation))
toBytes(Math.abs(result.contextRamUsage - contextRamEstimation))
);
const contextRamEstimationDiffText = (
contextRamEstimation == null || contextRamEstimationDiffBytes == null || result.contextRamUsage == null
Expand All @@ -331,33 +331,33 @@ export const InspectMeasureCommand: CommandModule<object, InspectMeasureCommand>
? String(previousContextSizeCheck)
: undefined,

estimatedModelVram: bytes(modelVramEstimation),
actualModelVram: bytes(result.modelVramUsage),
estimatedModelVram: toBytes(modelVramEstimation),
actualModelVram: toBytes(result.modelVramUsage),
modelVramEstimationDiff: modelVramEstimationDiffText,

estimatedModelRam: bytes(modelRamEstimation),
actualModelRam: bytes(result.modelRamUsage),
estimatedModelRam: toBytes(modelRamEstimation),
actualModelRam: toBytes(result.modelRamUsage),
modelRamEstimationDiff: modelRamEstimationDiffText,

estimatedContextVram: contextVramEstimation == null
? undefined
: bytes(contextVramEstimation),
: toBytes(contextVramEstimation),
actualContextVram: result.contextVramUsage == null
? undefined
: bytes(result.contextVramUsage),
: toBytes(result.contextVramUsage),
contextVramEstimationDiff: contextVramEstimationDiffText,
totalVramUsage: ((result.totalVramUsage / totalVram) * 100).toFixed(2).padStart(5, "0") + "% " +
chalk.gray("(" + bytes(result.totalVramUsage) + "/" + bytes(totalVram) + ")"),
chalk.gray("(" + toBytes(result.totalVramUsage) + "/" + toBytes(totalVram) + ")"),

estimatedContextRam: contextRamEstimation == null
? undefined
: bytes(contextRamEstimation),
: toBytes(contextRamEstimation),
actualContextRam: result.contextRamUsage == null
? undefined
: bytes(result.contextRamUsage),
: toBytes(result.contextRamUsage),
contextRamEstimationDiff: contextRamEstimationDiffText,
totalRamUsage: ((result.totalRamUsage / totalRam) * 100).toFixed(2).padStart(5, "0") + "% " +
chalk.gray("(" + bytes(result.totalRamUsage) + "/" + bytes(totalRam) + ")")
chalk.gray("(" + toBytes(result.totalRamUsage) + "/" + toBytes(totalRam) + ")")
});
}
}
Expand Down
Loading
Loading