withcatai
diff --git a/‎package-lock.json‎
Lines changed: 31 additions & 4 deletions b/‎package-lock.json‎
Lines changed: 31 additions & 4 deletions
diff --git a/‎package.json‎
Lines changed: 3 additions & 1 deletion b/‎package.json‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎src/gguf/GGUFInsights.ts‎
Lines changed: 83 additions & 0 deletions b/‎src/gguf/GGUFInsights.ts‎
Lines changed: 83 additions & 0 deletions
diff --git a/‎src/gguf/GGUFMetadata.ts‎
Lines changed: 51 additions & 0 deletions b/‎src/gguf/GGUFMetadata.ts‎
Lines changed: 51 additions & 0 deletions
diff --git a/‎src/gguf/errors/InvalidGGUFMagicError.ts‎
Lines changed: 5 additions & 0 deletions b/‎src/gguf/errors/InvalidGGUFMagicError.ts‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎src/gguf/errors/MetadataNotParsedYetError.ts‎
Lines changed: 5 additions & 0 deletions b/‎src/gguf/errors/MetadataNotParsedYetError.ts‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎src/gguf/errors/MissingNodeLlamaError.ts‎
Lines changed: 5 additions & 0 deletions b/‎src/gguf/errors/MissingNodeLlamaError.ts‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎src/gguf/errors/ModelScore/NotEnoughVRamError.ts‎
Lines changed: 13 additions & 0 deletions b/‎src/gguf/errors/ModelScore/NotEnoughVRamError.ts‎
Lines changed: 13 additions & 0 deletions
diff --git a/‎src/gguf/errors/UnsupportedMetadataTypeError.ts‎
Lines changed: 8 additions & 0 deletions b/‎src/gguf/errors/UnsupportedMetadataTypeError.ts‎
Lines changed: 8 additions & 0 deletions
@@ -113,6 +113,7 @@
     "@commitlint/cli": "^17.7.1",
     "@commitlint/config-conventional": "^17.7.0",
     "@semantic-release/exec": "^6.0.3",
+    "@types/async-retry": "^1.4.8",
     "@types/bytes": "^3.1.4",
     "@types/cli-progress": "^3.11.0",
     "@types/cross-spawn": "^6.0.2",
@@ -146,6 +147,7 @@
     "zx": "^7.2.3"
   },
   "dependencies": {
+    "async-retry": "^1.3.3",
     "bytes": "^3.1.2",
     "chalk": "^5.3.0",
     "chmodrp": "^1.0.2",
@@ -155,7 +157,7 @@
     "cross-spawn": "^7.0.3",
     "env-var": "^7.3.1",
     "fs-extra": "^11.2.0",
-    "lifecycle-utils": "^1.2.2",
+    "lifecycle-utils": "^1.3.0",
     "log-symbols": "^5.1.0",
     "node-addon-api": "^7.0.0",
     "octokit": "^3.1.0",
 
@@ -0,0 +1,83 @@
+import {Llama} from "../bindings/Llama.js";
+import MissingNodeLlamaError from "./errors/MissingNodeLlamaError.js";
+import {GGUFMetadataResponse} from "./ggufParser/GGUFParser.js";
+import NotEnoughVRamError from "./errors/ModelScore/NotEnoughVRamError.js";
+
+const PAD_AVAILABLE_VRAM = 1024 ** 2 * 500; // 500MB
+
+export type GGUFInsightsOptions = {
+    contextCount?: number,
+    nodeLlama?: Llama,
+    modelSize?: number
+};
+
+export default class GGUFInsights {
+    public readonly metadataResponse: GGUFMetadataResponse;
+    public readonly options: GGUFInsightsOptions = {};
+
+    public get metadata() {
+        return this.metadataResponse.metadata;
+    }
+
+    public get architectureMetadata() {
+        return this.metadata[this.metadata.general.architecture];
+    }
+
+    /**
+     * fp16 k,v matrices
+     */
+    public get kvMatrices(){
+        // 2 bytes each * 2 key and value
+        return (
+            2 * 2 *
+            this.architectureMetadata.context_length *
+            this.architectureMetadata.block_count *
+            this.architectureMetadata.embedding_length *
+            this.architectureMetadata.attention.head_count_kv /
+            this.architectureMetadata.attention.head_count
+        );
+    }
+
+    /**
+     * This amount is the overhead + tensors in memory
+     */
+    public get graphSize() {
+        // TODO: get this from the llama.cpp's graph calculations instead of
+        // estimating it's 1/6 * kv_cache_size * num_gqa
+        return (
+            (this.architectureMetadata.attention.head_count_kv /
+            this.architectureMetadata.attention.head_count) * this.kvMatrices / 6
+        );
+    }
+
+    public get VRAMUsage(){
+        return this.graphSize + this.kvMatrices + this.metadataResponse.metadataSize;
+    }
+
+    protected get _availableVRam(){
+        if (!this.options?.nodeLlama){
+            throw new MissingNodeLlamaError("GGUFInsights Calculations");
+        }
+        return this.options.nodeLlama.getVramState().total - PAD_AVAILABLE_VRAM;
+    }
+
+    public constructor(metadataResponse: GGUFMetadataResponse, options: GGUFInsightsOptions = {}) {
+        this.options = options;
+        this.metadataResponse = metadataResponse;
+
+    }
+
+
+    /**
+     * The score of the model by how much it's compatible to the current system
+     */
+    public modelScore(){
+        const vramScore = this.VRAMUsage / this._availableVRam;
+        if (vramScore >= 1){
+            throw new NotEnoughVRamError(this.VRAMUsage, this._availableVRam);
+        }
+
+        return vramScore;
+    }
+
+}
@@ -0,0 +1,51 @@
+import retry from "async-retry";
+import MetadataNotParsedYetError from "./errors/MetadataNotParsedYetError.js";
+import GGUFInsights, {GGUFInsightsOptions} from "./GGUFInsights.js";
+import GGUFParser, {GGUFMetadataResponse} from "./ggufParser/GGUFParser.js";
+import GGUFFetchStream from "./ggufParser/stream/GGUFFetchStream.js";
+import GGUFReadStream from "./ggufParser/stream/GGUFReadStream.js";
+
+export type GGUFMetadataOptions = {
+    source?: "network" | "local",
+    retry?: retry.Options,
+    ignoreKeys?: string[],
+    insights?: GGUFInsightsOptions
+};
+
+export default class GGUFMetadata {
+    protected _metadata?: GGUFMetadataResponse;
+    public readonly path: string;
+    public readonly options: Partial<GGUFMetadataOptions> = {};
+
+    public get metadata() {
+        if (!this._metadata) {
+            throw new MetadataNotParsedYetError(this.path);
+        }
+        return this._metadata;
+    }
+
+    public get insights(){
+        return new GGUFInsights(this.metadata, this.options.insights);
+    }
+
+    public constructor(path: string, options: Partial<GGUFMetadataOptions> = {}) {
+        this.options = options;
+        this.path = path;
+    }
+
+    public async parse() {
+        const stream = this._createStream();
+        const parser = new GGUFParser(stream, this.options.ignoreKeys);
+        return this._metadata = await parser.parseMetadata();
+    }
+
+    private _createStream() {
+        switch (this.options.source) {
+            case "network":
+                return new GGUFFetchStream(this.path, {retry: this.options.retry});
+            case "local":
+            default:
+                return new GGUFReadStream(this.path, {retry: this.options.retry});
+        }
+    }
+}
@@ -0,0 +1,5 @@
+export default class InvalidGGUFMagicError extends Error {
+    public constructor(message = "Invalid GGUF magic") {
+        super(message);
+    }
+}
@@ -0,0 +1,5 @@
+export default class MetadataNotParsedYetError extends Error {
+    public constructor(path: string) {
+        super(`Metadata not parsed yet: "${path}"`);
+    }
+}
@@ -0,0 +1,5 @@
+export default class MissingNodeLlamaError extends Error {
+    public constructor(purpose: string) {
+        super(`Missing nodeLlama options, this in required for ${purpose}`);
+    }
+}
@@ -0,0 +1,13 @@
+import prettyBytes from "pretty-bytes";
+
+export default class NotEnoughVRamError extends Error {
+    public readonly requiredVRAM: number;
+    public readonly availableVRAM: number;
+
+    public constructor(requiredVRAM: number, availableVRAM: number) {
+        super();
+        this.availableVRAM = availableVRAM;
+        this.requiredVRAM = requiredVRAM;
+        this.message = `Not enough VRAM, require ${prettyBytes(requiredVRAM)}, but only ${prettyBytes(availableVRAM)} available`;
+    }
+}
@@ -0,0 +1,8 @@
+export default class UnsupportedMetadataTypeError extends Error {
+    public readonly type: number;
+
+    public constructor(type: number) {
+        super(`Unsupported metadata type: "${type}"`);
+        this.type = type;
+    }
+}