fix: adapt to breaking changes of llama.cpp (#117)

giladgd · web-flow · commit 595a6bc3ba32 · 2023-12-10T00:30:34.000+02:00
diff --git a/llama/addon.cpp b/llama/addon.cpp
@@ -152,10 +152,6 @@ class LLAMAContext : public Napi::ObjectWrap<LLAMAContext> {
         context_params.n_batch = options.Get("batchSize").As<Napi::Number>().Int32Value();
       }
 
-      if (options.Has("f16Kv")) {
-          context_params.f16_kv = options.Get("f16Kv").As<Napi::Boolean>().Value();
-      }
-
       if (options.Has("logitsAll")) {
           context_params.logits_all = options.Get("logitsAll").As<Napi::Boolean>().Value();
       }
diff --git a/src/llamaEvaluator/LlamaContext.ts b/src/llamaEvaluator/LlamaContext.ts
@@ -26,9 +26,6 @@ export type LlamaContextOptions = {
     /** prompt processing batch size */
     batchSize?: number,
 
-    /** use fp16 for KV cache */
-    f16Kv?: boolean,
-
     /** the llama_eval() call computes all logits, not just the last one */
     logitsAll?: boolean,
 
@@ -85,7 +82,6 @@ export class LlamaContext {
         seed = model._contextOptions.seed,
         contextSize = model._contextOptions.contextSize,
         batchSize = model._contextOptions.batchSize,
-        f16Kv = model._contextOptions.f16Kv,
         logitsAll = model._contextOptions.logitsAll,
         embedding = model._contextOptions.embedding,
         threads = model._contextOptions.threads
@@ -95,7 +91,6 @@ export class LlamaContext {
             seed: seed != null ? Math.max(-1, seed) : undefined,
             contextSize,
             batchSize,
-            f16Kv,
             logitsAll,
             embedding,
             threads
diff --git a/src/llamaEvaluator/LlamaModel.ts b/src/llamaEvaluator/LlamaModel.ts
@@ -76,13 +76,6 @@ export type LlamaModelOptions = {
      */
     topP?: number,
 
-    /**
-     * use fp16 for KV cache
-     * @deprecated use the `f16Kv` option on `LlamaContext` instead
-     * @hidden
-     */
-    f16Kv?: boolean,
-
     /**
      * the llama_eval() call computes all logits, not just the last one
      * @deprecated use the `logitsAll` option on `LlamaContext` instead
@@ -116,7 +109,6 @@ export class LlamaModel {
         seed: LlamaModelOptions["seed"],
         contextSize: LlamaModelOptions["contextSize"],
         batchSize: LlamaModelOptions["batchSize"],
-        f16Kv: LlamaModelOptions["f16Kv"],
         logitsAll: LlamaModelOptions["logitsAll"],
         embedding: LlamaModelOptions["embedding"],
         threads: LlamaModelOptions["threads"]
@@ -160,7 +152,6 @@ export class LlamaModel {
      * Set to `1` to disable.
      *
      * Only relevant when `temperature` is set to a value greater than `0`.
-     * @param {boolean} [options.f16Kv] - use fp16 for KV cache
      * @param {boolean} [options.logitsAll] - the llama_eval() call computes all logits, not just the last one
      * @param {boolean} [options.vocabOnly] - only load the vocabulary, no weights
      * @param {boolean} [options.useMmap] - use mmap if possible
@@ -169,7 +160,7 @@ export class LlamaModel {
      */
     public constructor({
         modelPath, seed = null, contextSize = 1024 * 4, batchSize, gpuLayers,
-        threads = 6, temperature = 0, topK = 40, topP = 0.95, f16Kv, logitsAll, vocabOnly, useMmap, useMlock, embedding
+        threads = 6, temperature = 0, topK = 40, topP = 0.95, logitsAll, vocabOnly, useMmap, useMlock, embedding
     }: LlamaModelOptions) {
         this._model = new LLAMAModel(path.resolve(process.cwd(), modelPath), removeNullFields({
             gpuLayers,
@@ -182,7 +173,6 @@ export class LlamaModel {
             seed,
             contextSize,
             batchSize,
-            f16Kv,
             logitsAll,
             embedding,
             threads
diff --git a/src/utils/compileLLamaCpp.ts b/src/utils/compileLLamaCpp.ts
@@ -68,19 +68,24 @@ export async function compileLlamaCpp({
             __dirname
         );
 
-        const binFilesDirPath = path.join(llamaDirectory, "build", "llama.cpp", "bin");
+        const binFilesDirPaths = [
+            path.join(llamaDirectory, "build", "bin"),
+            path.join(llamaDirectory, "build", "llama.cpp", "bin")
+        ];
         const compiledResultDirPath = await getCompiledResultDir(true);
 
-        if (await fs.pathExists(binFilesDirPath)) {
-            const files = await fs.readdir(binFilesDirPath);
-
-            await Promise.all(
-                files.map((fileName) => (
-                    fs.copy(path.join(binFilesDirPath, fileName), path.join(compiledResultDirPath, fileName), {
-                        overwrite: false
-                    })
-                ))
-            );
+        for (const binFilesDirPath of binFilesDirPaths) {
+            if (await fs.pathExists(binFilesDirPath)) {
+                const files = await fs.readdir(binFilesDirPath);
+
+                await Promise.all(
+                    files.map((fileName) => (
+                        fs.copy(path.join(binFilesDirPath, fileName), path.join(compiledResultDirPath, fileName), {
+                            overwrite: false
+                        })
+                    ))
+                );
+            }
         }
 
         if (setUsedBinFlagArg) {
diff --git a/src/utils/getBin.ts b/src/utils/getBin.ts
@@ -117,7 +117,6 @@ export type LLAMAContext = {
         seed?: number,
         contextSize?: number,
         batchSize?: number,
-        f16Kv?: boolean,
         logitsAll?: boolean,
         embedding?: boolean,
         threads?: number,

Original file line number	Diff line number	Diff line change
`@@ -152,10 +152,6 @@ class LLAMAContext : public Napi::ObjectWrap<LLAMAContext> {`
`152`	`152`	`context_params.n_batch = options.Get("batchSize").As<Napi::Number>().Int32Value();`
`153`	`153`	`}`
`154`	`154`
`155`		`- if (options.Has("f16Kv")) {`
`156`		`- context_params.f16_kv = options.Get("f16Kv").As<Napi::Boolean>().Value();`
`157`		`- }`
`158`		`-`
`159`	`155`	`if (options.Has("logitsAll")) {`
`160`	`156`	`context_params.logits_all = options.Get("logitsAll").As<Napi::Boolean>().Value();`
`161`	`157`	`}`