Skip to content

Commit bcaab4f

Browse files
authored
feat: gguf parser (#168)
1 parent fa6cf2e commit bcaab4f

17 files changed

+1222
-5
lines changed

package-lock.json

Lines changed: 31 additions & 4 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,7 @@
113113
"@commitlint/cli": "^17.7.1",
114114
"@commitlint/config-conventional": "^17.7.0",
115115
"@semantic-release/exec": "^6.0.3",
116+
"@types/async-retry": "^1.4.8",
116117
"@types/bytes": "^3.1.4",
117118
"@types/cli-progress": "^3.11.0",
118119
"@types/cross-spawn": "^6.0.2",
@@ -146,6 +147,7 @@
146147
"zx": "^7.2.3"
147148
},
148149
"dependencies": {
150+
"async-retry": "^1.3.3",
149151
"bytes": "^3.1.2",
150152
"chalk": "^5.3.0",
151153
"chmodrp": "^1.0.2",
@@ -155,7 +157,7 @@
155157
"cross-spawn": "^7.0.3",
156158
"env-var": "^7.3.1",
157159
"fs-extra": "^11.2.0",
158-
"lifecycle-utils": "^1.2.2",
160+
"lifecycle-utils": "^1.3.0",
159161
"log-symbols": "^5.1.0",
160162
"node-addon-api": "^7.0.0",
161163
"octokit": "^3.1.0",

src/gguf/GGUFInsights.ts

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
import {Llama} from "../bindings/Llama.js";
2+
import MissingNodeLlamaError from "./errors/MissingNodeLlamaError.js";
3+
import {GGUFMetadataResponse} from "./ggufParser/GGUFParser.js";
4+
import NotEnoughVRamError from "./errors/ModelScore/NotEnoughVRamError.js";
5+
6+
const PAD_AVAILABLE_VRAM = 1024 ** 2 * 500; // 500MB
7+
8+
export type GGUFInsightsOptions = {
9+
contextCount?: number,
10+
nodeLlama?: Llama,
11+
modelSize?: number
12+
};
13+
14+
export default class GGUFInsights {
15+
public readonly metadataResponse: GGUFMetadataResponse;
16+
public readonly options: GGUFInsightsOptions = {};
17+
18+
public get metadata() {
19+
return this.metadataResponse.metadata;
20+
}
21+
22+
public get architectureMetadata() {
23+
return this.metadata[this.metadata.general.architecture];
24+
}
25+
26+
/**
27+
* fp16 k,v matrices
28+
*/
29+
public get kvMatrices(){
30+
// 2 bytes each * 2 key and value
31+
return (
32+
2 * 2 *
33+
this.architectureMetadata.context_length *
34+
this.architectureMetadata.block_count *
35+
this.architectureMetadata.embedding_length *
36+
this.architectureMetadata.attention.head_count_kv /
37+
this.architectureMetadata.attention.head_count
38+
);
39+
}
40+
41+
/**
42+
* This amount is the overhead + tensors in memory
43+
*/
44+
public get graphSize() {
45+
// TODO: get this from the llama.cpp's graph calculations instead of
46+
// estimating it's 1/6 * kv_cache_size * num_gqa
47+
return (
48+
(this.architectureMetadata.attention.head_count_kv /
49+
this.architectureMetadata.attention.head_count) * this.kvMatrices / 6
50+
);
51+
}
52+
53+
public get VRAMUsage(){
54+
return this.graphSize + this.kvMatrices + this.metadataResponse.metadataSize;
55+
}
56+
57+
protected get _availableVRam(){
58+
if (!this.options?.nodeLlama){
59+
throw new MissingNodeLlamaError("GGUFInsights Calculations");
60+
}
61+
return this.options.nodeLlama.getVramState().total - PAD_AVAILABLE_VRAM;
62+
}
63+
64+
public constructor(metadataResponse: GGUFMetadataResponse, options: GGUFInsightsOptions = {}) {
65+
this.options = options;
66+
this.metadataResponse = metadataResponse;
67+
68+
}
69+
70+
71+
/**
72+
* The score of the model by how much it's compatible to the current system
73+
*/
74+
public modelScore(){
75+
const vramScore = this.VRAMUsage / this._availableVRam;
76+
if (vramScore >= 1){
77+
throw new NotEnoughVRamError(this.VRAMUsage, this._availableVRam);
78+
}
79+
80+
return vramScore;
81+
}
82+
83+
}

src/gguf/GGUFMetadata.ts

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
import retry from "async-retry";
2+
import MetadataNotParsedYetError from "./errors/MetadataNotParsedYetError.js";
3+
import GGUFInsights, {GGUFInsightsOptions} from "./GGUFInsights.js";
4+
import GGUFParser, {GGUFMetadataResponse} from "./ggufParser/GGUFParser.js";
5+
import GGUFFetchStream from "./ggufParser/stream/GGUFFetchStream.js";
6+
import GGUFReadStream from "./ggufParser/stream/GGUFReadStream.js";
7+
8+
export type GGUFMetadataOptions = {
9+
source?: "network" | "local",
10+
retry?: retry.Options,
11+
ignoreKeys?: string[],
12+
insights?: GGUFInsightsOptions
13+
};
14+
15+
export default class GGUFMetadata {
16+
protected _metadata?: GGUFMetadataResponse;
17+
public readonly path: string;
18+
public readonly options: Partial<GGUFMetadataOptions> = {};
19+
20+
public get metadata() {
21+
if (!this._metadata) {
22+
throw new MetadataNotParsedYetError(this.path);
23+
}
24+
return this._metadata;
25+
}
26+
27+
public get insights(){
28+
return new GGUFInsights(this.metadata, this.options.insights);
29+
}
30+
31+
public constructor(path: string, options: Partial<GGUFMetadataOptions> = {}) {
32+
this.options = options;
33+
this.path = path;
34+
}
35+
36+
public async parse() {
37+
const stream = this._createStream();
38+
const parser = new GGUFParser(stream, this.options.ignoreKeys);
39+
return this._metadata = await parser.parseMetadata();
40+
}
41+
42+
private _createStream() {
43+
switch (this.options.source) {
44+
case "network":
45+
return new GGUFFetchStream(this.path, {retry: this.options.retry});
46+
case "local":
47+
default:
48+
return new GGUFReadStream(this.path, {retry: this.options.retry});
49+
}
50+
}
51+
}
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
export default class InvalidGGUFMagicError extends Error {
2+
public constructor(message = "Invalid GGUF magic") {
3+
super(message);
4+
}
5+
}
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
export default class MetadataNotParsedYetError extends Error {
2+
public constructor(path: string) {
3+
super(`Metadata not parsed yet: "${path}"`);
4+
}
5+
}
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
export default class MissingNodeLlamaError extends Error {
2+
public constructor(purpose: string) {
3+
super(`Missing nodeLlama options, this in required for ${purpose}`);
4+
}
5+
}
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
import prettyBytes from "pretty-bytes";
2+
3+
export default class NotEnoughVRamError extends Error {
4+
public readonly requiredVRAM: number;
5+
public readonly availableVRAM: number;
6+
7+
public constructor(requiredVRAM: number, availableVRAM: number) {
8+
super();
9+
this.availableVRAM = availableVRAM;
10+
this.requiredVRAM = requiredVRAM;
11+
this.message = `Not enough VRAM, require ${prettyBytes(requiredVRAM)}, but only ${prettyBytes(availableVRAM)} available`;
12+
}
13+
}
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
export default class UnsupportedMetadataTypeError extends Error {
2+
public readonly type: number;
3+
4+
public constructor(type: number) {
5+
super(`Unsupported metadata type: "${type}"`);
6+
this.type = type;
7+
}
8+
}

0 commit comments

Comments
 (0)