Skip to content

Commit 5ca33c7

Browse files
authored
feat(inspect gpu command): print device names (#198)
* feat(`inspect gpu` command): print device names * fix: ensure `TokenBias` was created with the current model * fix: update `ipull` to resolve model download issues
1 parent 7878c8a commit 5ca33c7

File tree

7 files changed

+48
-25
lines changed

7 files changed

+48
-25
lines changed

docs/guide/vulkan.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,11 @@ You should see an output like this:
1717
```ansi
1818
Vulkan: available
1919
20+
Vulkan device: Apple M1 Max
2021
Vulkan used VRAM: 0% (64KB/21.33GB)
2122
Vulkan free VRAM: 99.99% (21.33GB/21.33GB)
2223
24+
CPU model: Apple M1 Max
2325
Used RAM: 97.37% (31.16GB/32GB)
2426
Free RAM: 2.62% (860.72MB/32GB)
2527
```

package-lock.json

Lines changed: 16 additions & 16 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -137,9 +137,9 @@
137137
"semantic-release": "^22.0.8",
138138
"tslib": "^2.6.1",
139139
"typedoc": "^0.25.3",
140-
"typedoc-plugin-markdown": "^4.0.0-next.53",
141-
"typedoc-plugin-mdn-links": "^3.1.5",
142-
"typedoc-vitepress-theme": "^1.0.0-next.9",
140+
"typedoc-plugin-markdown": "^4.0.0-next.55",
141+
"typedoc-plugin-mdn-links": "^3.1.19",
142+
"typedoc-vitepress-theme": "^1.0.0-next.10",
143143
"typescript": "^5.2.2",
144144
"vite-node": "^1.4.0",
145145
"vitepress": "1.0.0-rc.22",
@@ -157,7 +157,7 @@
157157
"cross-spawn": "^7.0.3",
158158
"env-var": "^7.3.1",
159159
"fs-extra": "^11.2.0",
160-
"ipull": "^3.0.8",
160+
"ipull": "^3.0.11",
161161
"is-unicode-supported": "^2.0.0",
162162
"lifecycle-utils": "^1.4.1",
163163
"log-symbols": "^5.1.0",

src/cli/commands/inspect/commands/InspectGpuCommand.ts

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,10 @@ async function logGpuVramUsage(gpu: BuildGpu) {
6565
});
6666
const gpuName = getPrettyBuildGpuName(gpu);
6767
const vramStatus = llama.getVramState();
68+
const gpuDeviceNames = llama.getGpuDeviceNames();
69+
70+
if (gpuDeviceNames.length > 0)
71+
console.info(`${chalk.yellow(`${gpuName} device${gpuDeviceNames.length > 1 ? "s" : ""}:`)} ${gpuDeviceNames.join(", ")}`);
6872

6973
console.info(`${chalk.yellow(`${gpuName} used VRAM:`)} ${getPercentageString(vramStatus.used, vramStatus.total)}% ${chalk.gray("(" + bytes(vramStatus.used) + "/" + bytes(vramStatus.total) + ")")}`);
7074
console.info(`${chalk.yellow(`${gpuName} free VRAM:`)} ${getPercentageString(vramStatus.free, vramStatus.total)}% ${chalk.gray("(" + bytes(vramStatus.free) + "/" + bytes(vramStatus.total) + ")")}`);
@@ -75,6 +79,16 @@ async function logRamUsage() {
7579
const totalMemory = os.totalmem();
7680
const freeMemory = os.freemem();
7781
const usedMemory = totalMemory - freeMemory;
82+
const cpuDeviceNames = Array.from(
83+
new Set(
84+
os.cpus()
85+
.map((cpu) => (cpu.model?.trim?.() ?? ""))
86+
.filter((deviceName) => deviceName.length > 0)
87+
)
88+
);
89+
90+
if (cpuDeviceNames.length > 0)
91+
console.info(`${chalk.yellow("CPU model" + (cpuDeviceNames.length > 1 ? "s" : "") + ":")} ${cpuDeviceNames.join(", ")}`);
7892

7993
console.info(`${chalk.yellow("Used RAM:")} ${getPercentageString(usedMemory, totalMemory)}% ${chalk.gray("(" + bytes(usedMemory) + "/" + bytes(totalMemory) + ")")}`);
8094
console.info(`${chalk.yellow("Free RAM:")} ${getPercentageString(freeMemory, totalMemory)}% ${chalk.gray("(" + bytes(freeMemory) + "/" + bytes(totalMemory) + ")")}`);

src/evaluator/LlamaContext/LlamaContext.ts

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -932,7 +932,7 @@ export class LlamaContextSequence {
932932
if (resolvedGrammarEvaluationState != null && resolvedGrammarEvaluationState._llama !== this.model._llama)
933933
throw new Error("The LlamaGrammar used by passed to this function was created with a different Llama instance than the one used by this sequence's model. Make sure you use the same Llama instance for both the model and the grammar.");
934934

935-
const {tokenBiasKeys, tokenBiasValues} = getTokenBiasesForAddon(tokenBias);
935+
const {tokenBiasKeys, tokenBiasValues} = getTokenBiasesForAddon(tokenBias, this.model);
936936

937937
return this._context._ctx.sampleToken(batchLogitIndex, removeNullFields({
938938
temperature,
@@ -1108,7 +1108,7 @@ type CurrentBatchItem = {
11081108
processAmount: number
11091109
};
11101110

1111-
function getTokenBiasesForAddon(tokenBias?: TokenBias | (() => TokenBias)) {
1111+
function getTokenBiasesForAddon(tokenBias: undefined | TokenBias | (() => TokenBias), currentModel: LlamaModel) {
11121112
if (tokenBias == null)
11131113
return {
11141114
tokenBiasKeys: undefined,
@@ -1118,6 +1118,12 @@ function getTokenBiasesForAddon(tokenBias?: TokenBias | (() => TokenBias)) {
11181118
if (tokenBias instanceof Function)
11191119
tokenBias = tokenBias();
11201120

1121+
if (tokenBias._model !== currentModel)
1122+
throw new Error(
1123+
"This TokenBias instance was created with a different model than the one used by this context. " +
1124+
"Make sure you use the model instance of the context sequence for the TokenBias you use it with."
1125+
);
1126+
11211127
const tokenBiasKeys: Token[] = [];
11221128
const tokenBiasValues: number[] = [];
11231129

src/evaluator/LlamaGrammar.ts

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,9 @@ export class LlamaGrammar {
3232

3333
/**
3434
* > GBNF files are supported.
35-
* > More info here: [github:ggerganov/llama.cpp:grammars/README.md](
36-
* > https://github.com/ggerganov/llama.cpp/blob/f5fe98d11bdf9e7797bcfb05c0c3601ffc4b9d26/grammars/README.md)
35+
* > More info here: [
36+
* github:ggerganov/llama.cpp:grammars/README.md
37+
* ](https://github.com/ggerganov/llama.cpp/blob/f5fe98d11bdf9e7797bcfb05c0c3601ffc4b9d26/grammars/README.md)
3738
* @param options
3839
*/
3940
public constructor({

src/evaluator/TokenBias.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ import {tokenizeInput} from "../utils/tokenizeInput.js";
44
import {LlamaModel} from "./LlamaModel.js";
55

66
export class TokenBias {
7-
/** @internal */ private readonly _model: LlamaModel;
7+
/** @internal */ public readonly _model: LlamaModel;
88
/** @internal */ public readonly _biases = new Map<Token, number>();
99

1010
public constructor(model: LlamaModel) {

0 commit comments

Comments
 (0)