fix: improve model downloader CI logs (#329)

giladgd · web-flow · commit 4b7ef5bbeaec · 2024-09-23T02:08:50.000+03:00
* fix: improve model downloader CI logs
* fix: `CodeGemma` adaptations
diff --git a/package-lock.json b/package-lock.json
diff --git a/package.json b/package.json
@@ -181,7 +181,7 @@
     "filenamify": "^6.0.0",
     "fs-extra": "^11.2.0",
     "ignore": "^5.3.2",
-    "ipull": "^3.6.2",
+    "ipull": "^3.7.2",
     "is-unicode-supported": "^2.1.0",
     "lifecycle-utils": "^1.7.0",
     "log-symbols": "^7.0.0",
diff --git a/src/cli/recommendedModels.ts b/src/cli/recommendedModels.ts
@@ -158,8 +158,8 @@ export const recommendedModels: ModelRecommendation[] = [{
     name: "Mixtral 8x7B MoE",
     abilities: ["chat", "complete"],
     description: "Mixtral models were created by Mistal AI and are general purpose models that utilize a Mixture of Experts architecture.\n" +
-        "Mixtures of Experts (MoE) is a technique where different models, each skilled in solving a particular kind of problem, work together to the improve the overall performance on complex tasks.\n"
-        + "This model includes 8 expert models, each with 7 billion parameters.",
+        "Mixtures of Experts (MoE) is a technique where different models, each skilled in solving a particular kind of problem, work together to the improve the overall performance on complex tasks.\n" +
+        "This model includes 8 expert models, each with 7 billion parameters.",
 
     fileOptions: [{
         huggingFace: {
@@ -178,7 +178,7 @@ export const recommendedModels: ModelRecommendation[] = [{
     name: "Mistral 7B Instruct v0.2",
     abilities: ["chat", "complete"],
     description: "Mistral models were created by Mistal AI and are general purpose models.\n" +
-        + "This is the 7 billion parameters version of the model.",
+        "This is the 7 billion parameters version of the model.",
 
     fileOptions: [{
         huggingFace: {
@@ -518,6 +518,7 @@ export const recommendedModels: ModelRecommendation[] = [{
     abilities: ["code", "complete", "infill"],
     description: "CodeGemma models were created by Google and are optimized for code completion, code generation, " +
         "natual language understanding, mathematical reasoning, and instruction following.\n" +
+        "This model is not suited for chat.\n" +
         "This is the 2 billion parameters version of the model.\n",
 
     fileOptions: [{
@@ -556,6 +557,7 @@ export const recommendedModels: ModelRecommendation[] = [{
     abilities: ["code", "complete", "infill"],
     description: "CodeGemma models were created by Google and are optimized for code completion, code generation, " +
         "natual language understanding, mathematical reasoning, and instruction following.\n" +
+        "This model is not suited for chat.\n" +
         "This is the 7 billion parameters version of the model.\n",
 
     fileOptions: [{
diff --git a/src/evaluator/LlamaCompletion.ts b/src/evaluator/LlamaCompletion.ts
@@ -265,6 +265,8 @@ export class LlamaCompletion {
         const bosToken = this._sequence.model.tokens.bos;
         const shouldPrependBosToken = this._sequence.model.tokens.shouldPrependBosToken;
 
+        const extraEosTokens = getExtraCompletionEosTokens(this._sequence.model);
+
         async function fitInputIntoContext({
             maxTokens, tokens
         }: {
@@ -348,7 +350,8 @@ export class LlamaCompletion {
                             tokens: [...resolvedInput, ...res, ...pendingTokens]
                         })
                     };
-                }
+                },
+                extraEosTokens
             });
         });
     }
@@ -840,6 +843,26 @@ async function resolveContextShiftSize(
     return defaultContextShiftSize(sequence);
 }
 
+function getExtraCompletionEosTokens(model: LlamaModel) {
+    const extraEosTokens = new Set<Token>();
+
+    if (model.fileInfo.metadata?.general?.architecture === GgufArchitectureType.gemma ||
+        model.fileInfo.metadata?.general?.architecture === GgufArchitectureType.gemma2
+    ) {
+        for (const token of model.iterateAllTokens()) {
+            const tokenText = model.detokenize([token], true);
+            if (tokenText === "<|file_separator|>" || tokenText === "<|fim_prefix|>") {
+                extraEosTokens.add(token);
+
+                if (extraEosTokens.size === 2)
+                    break;
+            }
+        }
+    }
+
+    return extraEosTokens;
+}
+
 function getExtraInfillEosTokens(model: LlamaModel) {
     const extraEosTokens = new Set<Token>();
 
diff --git a/src/utils/createModelDownloader.ts b/src/utils/createModelDownloader.ts
@@ -5,7 +5,7 @@ import fs from "fs-extra";
 import {normalizeGgufDownloadUrl} from "../gguf/utils/normalizeGgufDownloadUrl.js";
 import {createSplitPartFilename, resolveSplitGgufParts} from "../gguf/utils/resolveSplitGgufParts.js";
 import {getFilenameForBinarySplitGgufPartUrls, resolveBinarySplitGgufPartUrls} from "../gguf/utils/resolveBinarySplitGgufPartUrls.js";
-import {cliModelsDirectory} from "../config.js";
+import {cliModelsDirectory, isCI} from "../config.js";
 import {safeEventCallback} from "./safeEventCallback.js";
 import {ModelFileAccessTokens, resolveModelFileAccessTokensTryHeaders} from "./modelFileAccesTokens.js";
 import {pushAll} from "./pushAll.js";
@@ -312,6 +312,7 @@ export class ModelDownloader {
                 directory: this._dirPath,
                 fileName: this._fileName ?? getFilenameForBinarySplitGgufPartUrls(binarySplitPartUrls),
                 cliProgress: this._showCliProgress,
+                cliStyle: isCI ? "ci" : "fancy",
                 headers: this._headers ?? {},
                 tryHeaders: this._tryHeaders.slice(),
                 skipExisting: this._skipExisting
@@ -335,6 +336,7 @@ export class ModelDownloader {
                 directory: this._dirPath,
                 fileName: this._fileName ?? undefined,
                 cliProgress: this._showCliProgress,
+                cliStyle: isCI ? "ci" : "fancy",
                 headers: this._headers ?? {},
                 tryHeaders: this._tryHeaders.slice(),
                 skipExisting: this._skipExisting
@@ -364,6 +366,7 @@ export class ModelDownloader {
         this._downloader = await downloadSequence(
             {
                 cliProgress: this._showCliProgress,
+                cliStyle: isCI ? "ci" : "fancy",
                 parallelDownloads: this._parallelDownloads
             },
             ...partDownloads
@@ -541,6 +544,7 @@ export class CombinedModelDownloader {
         this._downloader = await downloadSequence(
             {
                 cliProgress: this._showCliProgress,
+                cliStyle: isCI ? "ci" : "fancy",
                 parallelDownloads: this._parallelDownloads
             },
             ...(await Promise.all(this._downloaders)).flatMap((downloader) => downloader._specificFileDownloaders)
diff --git a/test/utils/modelFiles.ts b/test/utils/modelFiles.ts
@@ -5,6 +5,7 @@ import fs from "fs-extra";
 import chalk from "chalk";
 import withStatusLogs from "../../src/utils/withStatusLogs.js";
 import {withLockfile} from "../../src/utils/withLockfile.js";
+import {isCI} from "../../src/config.js";
 
 const __dirname = path.dirname(fileURLToPath(import.meta.url));
 
@@ -44,7 +45,8 @@ export async function getModelFile(modelName: keyof typeof supportedModels) {
                 url: modelUrl,
                 directory: path.dirname(modelFilePath),
                 fileName: path.basename(modelFilePath),
-                cliProgress: true
+                cliProgress: true,
+                cliStyle: isCI ? "ci" : "fancy"
             });
             await downloader.download();
 
@@ -89,6 +91,7 @@ export async function downloadAllModels() {
         console.info(`Downloading ${pendingDownloads.length} model${pendingDownloads.length === 1 ? "" : "s"}`);
         const downloader = await downloadSequence({
             cliProgress: true,
+            cliStyle: isCI ? "ci" : "fancy",
             parallelDownloads: 4
         }, ...pendingDownloads);
         await downloader.download();