From 31c47f11f632c15fcdefda01e7eb85b6c1378657 Mon Sep 17 00:00:00 2001 From: Gilad S Date: Fri, 1 Aug 2025 19:10:41 +0300 Subject: [PATCH 01/10] test: fix tests --- src/bindings/types.ts | 8 ++++++-- test/modelDependent/bgeReranker/rank.test.ts | 6 +++--- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/src/bindings/types.ts b/src/bindings/types.ts index 5c6af332..3c3a2d66 100644 --- a/src/bindings/types.ts +++ b/src/bindings/types.ts @@ -103,13 +103,17 @@ export enum LlamaVocabularyType { bpe = "bpe", wpm = "wpm", ugm = "ugm", - rwkv = "rwkv" + rwkv = "rwkv", + plamo2 = "plamo2" } export const LlamaVocabularyTypeValues = Object.freeze([ LlamaVocabularyType.none, LlamaVocabularyType.spm, LlamaVocabularyType.bpe, - LlamaVocabularyType.wpm + LlamaVocabularyType.wpm, + LlamaVocabularyType.ugm, + LlamaVocabularyType.rwkv, + LlamaVocabularyType.plamo2 ] as const); /** diff --git a/test/modelDependent/bgeReranker/rank.test.ts b/test/modelDependent/bgeReranker/rank.test.ts index 37a4bd4e..af7999fd 100644 --- a/test/modelDependent/bgeReranker/rank.test.ts +++ b/test/modelDependent/bgeReranker/rank.test.ts @@ -5,7 +5,7 @@ import {getTestLlama} from "../../utils/getTestLlama.js"; describe("bgeReranker", () => { describe("rank", () => { test("simple ranking", {timeout: 1000 * 60 * 60 * 2}, async (test) => { - if (process.platform !== "darwin") + if (process.platform !== "darwin" && process.arch !== "arm64") test.skip(); // the scores are a bit different on different platforms, so skipping on other platforms due to flakiness const modelPath = await getModelFile("bge-reranker-v2-m3-Q8_0.gguf"); @@ -61,7 +61,7 @@ describe("bgeReranker", () => { }); test("rank all", {timeout: 1000 * 60 * 60 * 2}, async (test) => { - if (process.platform !== "darwin") + if (process.platform !== "darwin" && process.arch !== "arm64") test.skip(); // the scores are a bit different on different platforms, so skipping on other platforms due to flakiness const modelPath = await getModelFile("bge-reranker-v2-m3-Q8_0.gguf"); @@ -115,7 +115,7 @@ describe("bgeReranker", () => { }); test("rank and sort", {timeout: 1000 * 60 * 60 * 2}, async (test) => { - if (process.platform !== "darwin") + if (process.platform !== "darwin" && process.arch !== "arm64") test.skip(); // the scores are a bit different on different platforms, so skipping on other platforms due to flakiness const modelPath = await getModelFile("bge-reranker-v2-m3-Q8_0.gguf"); From 59519f0015b3cd612d5cbf7fb0a2cc18a63dd3ff Mon Sep 17 00:00:00 2001 From: Gilad S Date: Tue, 5 Aug 2025 23:03:41 +0300 Subject: [PATCH 02/10] test: fix tests --- test/modelDependent/bgeReranker/rank.test.ts | 56 ++++++++++---------- 1 file changed, 28 insertions(+), 28 deletions(-) diff --git a/test/modelDependent/bgeReranker/rank.test.ts b/test/modelDependent/bgeReranker/rank.test.ts index af7999fd..e0ffea64 100644 --- a/test/modelDependent/bgeReranker/rank.test.ts +++ b/test/modelDependent/bgeReranker/rank.test.ts @@ -43,19 +43,19 @@ describe("bgeReranker", () => { const highestRankDocument = documents[highestRankIndex]; expect(highestRankDocument).to.eql("Mount Everest is the tallest mountain in the world"); - expect(simplifyRanks([highestRank])[0]).toMatchInlineSnapshot("0.026596993576865856"); + expect(simplifyRanks([highestRank])[0]).toMatchInlineSnapshot(`0.014774031693273055`); expect(simplifyRanks(ranks)).toMatchInlineSnapshot(` [ 0.00002039908727992137, - 0.00006772414961977023, - 0.00003716893710288947, + 0.00002039908727992137, + 0.00002039908727992137, 0.004496273160941178, - 0.00003716893710288947, - 0.026596993576865856, - 0.00003716893710288947, + 0.00002039908727992137, + 0.014774031693273055, + 0.00002039908727992137, + 0.00002039908727992137, 0.00002039908727992137, 0.00002039908727992137, - 0.00003716893710288947, ] `); }); @@ -97,19 +97,19 @@ describe("bgeReranker", () => { const highestRankDocument = documents[highestRankIndex]; expect(highestRankDocument).to.eql("Mount Everest is the tallest mountain in the world"); - expect(simplifyRanks([highestRank])[0]).toMatchInlineSnapshot("0.026596993576865856"); + expect(simplifyRanks([highestRank])[0]).toMatchInlineSnapshot(`0.014774031693273055`); expect(simplifyRanks(ranks)).toMatchInlineSnapshot(` [ 0.00002039908727992137, - 0.00006772414961977023, - 0.00003716893710288947, + 0.00002039908727992137, + 0.00002039908727992137, 0.004496273160941178, - 0.00003716893710288947, - 0.026596993576865856, - 0.00003716893710288947, + 0.00002039908727992137, + 0.014774031693273055, + 0.00002039908727992137, + 0.00002039908727992137, 0.00002039908727992137, 0.00002039908727992137, - 0.00003716893710288947, ] `); }); @@ -150,41 +150,41 @@ describe("bgeReranker", () => { expect(simplifySortedRanks([topDocument])[0]).toMatchInlineSnapshot(` { "document": "Mount Everest is the tallest mountain in the world", - "score": 0.026596993576865856, + "score": 0.014774031693273055, } `); expect(simplifySortedRanks(rankedDocuments)).toMatchInlineSnapshot(` [ { "document": "Mount Everest is the tallest mountain in the world", - "score": 0.026596993576865856, + "score": 0.014774031693273055, }, { "document": "The capital of France is Paris", "score": 0.004496273160941178, }, { - "document": "I love eating pizza with extra cheese", - "score": 0.00006772414961977023, + "document": "Not all the things that shine are made of gold", + "score": 0.00002039908727992137, }, { - "document": "A warm cup of tea is perfect for a cold winter day", - "score": 0.00003716893710288947, + "document": "I love eating pizza with extra cheese", + "score": 0.00002039908727992137, }, { "document": "Dogs love to play fetch with their owners", - "score": 0.00003716893710288947, + "score": 0.00002039908727992137, }, { - "document": "Cleaning the house is a good way to keep it tidy", - "score": 0.00003716893710288947, + "document": "The sky is clear and blue today", + "score": 0.00002039908727992137, }, { - "document": "Not all the things that shine are made of gold", + "document": "Cleaning the house is a good way to keep it tidy", "score": 0.00002039908727992137, }, { - "document": "The sky is clear and blue today", + "document": "A warm cup of tea is perfect for a cold winter day", "score": 0.00002039908727992137, }, ] @@ -226,12 +226,12 @@ describe("bgeReranker", () => { [ "Mount Everest is the tallest mountain in the world", "The capital of France is Paris", + "Not all the things that shine are made of gold", "I love eating pizza with extra cheese", - "A warm cup of tea is perfect for a cold winter day", "Dogs love to play fetch with their owners", - "Cleaning the house is a good way to keep it tidy", - "Not all the things that shine are made of gold", "The sky is clear and blue today", + "Cleaning the house is a good way to keep it tidy", + "A warm cup of tea is perfect for a cold winter day", ] `); }); From b648103d863370e131c4ed34c48d265ba6703b52 Mon Sep 17 00:00:00 2001 From: Gilad S Date: Tue, 5 Aug 2025 23:05:18 +0300 Subject: [PATCH 03/10] feat: add gpt-oss architecture type --- src/gguf/types/GgufMetadataTypes.ts | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/gguf/types/GgufMetadataTypes.ts b/src/gguf/types/GgufMetadataTypes.ts index 85a67a93..a36948fa 100644 --- a/src/gguf/types/GgufMetadataTypes.ts +++ b/src/gguf/types/GgufMetadataTypes.ts @@ -14,6 +14,7 @@ export const enum GgufArchitectureType { bert = "bert", nomicBert = "nomic-bert", nomicBertMoe = "nomic-bert-moe", + neoBert = "neo-bert", jinaBertV2 = "jina-bert-v2", bloom = "bloom", stablelm = "stablelm", @@ -27,6 +28,7 @@ export const enum GgufArchitectureType { phi3 = "phi3", phimoe = "phimoe", plamo = "plamo", + plamo2 = "plamo2", codeshell = "codeshell", orion = "orion", internlm2 = "internlm2", @@ -38,6 +40,9 @@ export const enum GgufArchitectureType { gemma3n = "gemma3n", starcoder2 = "starcoder2", mamba = "mamba", + mamba2 = "mamba2", + jamba = "jamba", + falconH1 = "falcon-h1", xverse = "xverse", commandR = "command-r", cohere2 = "cohere2", @@ -51,6 +56,7 @@ export const enum GgufArchitectureType { deepseek2 = "deepseek2", chatglm = "chatglm", glm4 = "glm4", + glm4moe = "glm4moe", bitnet = "bitnet", t5 = "t5", t5encoder = "t5encoder", @@ -64,6 +70,7 @@ export const enum GgufArchitectureType { arwkv7 = "arwkv7", granite = "granite", granitemoe = "granitemoe", + granitehybrid = "granitehybrid", chameleon = "chameleon", wavtokenizerDec = "wavtokenizer-dec", plm = "plm", @@ -71,6 +78,15 @@ export const enum GgufArchitectureType { dots1 = "dots1", arcee = "arcee", ernie4_5 = "ernie4_5", + ernie4_5Moe = "ernie4_5-moe", + hunyuanMoe = "hunyuan-moe", + hunyuanDense = "hunyuan-dense", + smollm3 = "smollm3", + gptOss = "gpt-oss", + lfm2 = "lfm2", + dream = "dream", + smallthinker = "smallthinker", + llada = "llada", clip = "clip", unknown = "(unknown)" } From 21cc5b9b30096e5a766284cda0dab08bb0e45138 Mon Sep 17 00:00:00 2001 From: Gilad S Date: Fri, 8 Aug 2025 05:04:21 +0300 Subject: [PATCH 04/10] feat: gpt-oss support --- package-lock.json | 329 +++--- package.json | 8 +- src/bindings/Llama.ts | 2 + src/chatWrappers/HarmonyChatWrapper.ts | 709 +++++++++++++ src/chatWrappers/utils/resolveChatWrapper.ts | 30 +- src/cli/commands/ChatCommand.ts | 2 +- src/cli/commands/CompleteCommand.ts | 2 +- src/cli/commands/InfillCommand.ts | 2 +- .../inspect/commands/InspectMeasureCommand.ts | 31 +- src/evaluator/LlamaChat/LlamaChat.ts | 363 ++++++- src/gguf/insights/GgufInsights.ts | 23 +- src/gguf/types/GgufMetadataTypes.ts | 3 +- src/gguf/types/GgufTensorInfoTypes.ts | 3 +- src/gguf/utils/ggufQuantNames.ts | 1 + src/index.ts | 7 +- src/types.ts | 173 +++- src/utils/getChatWrapperSegmentDefinition.ts | 2 + test/modelDependent/bgeReranker/rank.test.ts | 4 +- .../chatWrappers/HarmonyChatWrapper.test.ts | 411 ++++++++ .../chatWrappers/utils/jinjaTemplates.ts | 954 ++++++++++++++++++ .../utils/resolveChatWrapper.test.ts | 39 +- 21 files changed, 2899 insertions(+), 199 deletions(-) create mode 100644 src/chatWrappers/HarmonyChatWrapper.ts create mode 100644 test/standalone/chatWrappers/HarmonyChatWrapper.test.ts create mode 100644 test/standalone/chatWrappers/utils/jinjaTemplates.ts diff --git a/package-lock.json b/package-lock.json index 719e8ef6..1b788ce9 100644 --- a/package-lock.json +++ b/package-lock.json @@ -68,8 +68,8 @@ "@types/validate-npm-package-name": "^4.0.2", "@types/which": "^3.0.4", "@types/yargs": "^17.0.33", - "@vitest/coverage-v8": "^3.1.3", - "@vitest/ui": "^3.1.3", + "@vitest/coverage-v8": "^3.2.4", + "@vitest/ui": "^3.2.4", "electron": "^37.2.4", "eslint": "^9.26.0", "eslint-import-resolver-typescript": "^4.3.4", @@ -89,10 +89,10 @@ "typedoc-vitepress-theme": "^1.1.2", "typescript": "^5.8.3", "typescript-eslint": "^8.32.0", - "vite-node": "^3.1.3", + "vite-node": "^3.2.4", "vitepress": "^1.6.3", "vitepress-plugin-llms": "^1.7.2", - "vitest": "^3.1.3", + "vitest": "^3.2.4", "zx": "^8.5.4" }, "engines": { @@ -2230,9 +2230,9 @@ "license": "MIT" }, "node_modules/@jridgewell/trace-mapping": { - "version": "0.3.25", - "resolved": "https://registry.npmjs.org/@jridgewell/trace-mapping/-/trace-mapping-0.3.25.tgz", - "integrity": "sha512-vNk6aEwybGtawWmy/PzwnGDOjCkLWSD2wqvjGGAgOAwCGWySYXfYoxt00IJkTF+8Lb57DwOb3Aa0o9CApepiYQ==", + "version": "0.3.29", + "resolved": "https://registry.npmjs.org/@jridgewell/trace-mapping/-/trace-mapping-0.3.29.tgz", + "integrity": "sha512-uw6guiW/gcAGPDhLmd77/6lW8QLeiV5RUTsAX46Db6oLhGaVj4lhnPwb184s1bkc8kdVg/+h988dro8GRDpmYQ==", "dev": true, "license": "MIT", "dependencies": { @@ -4302,6 +4302,16 @@ "@types/responselike": "^1.0.0" } }, + "node_modules/@types/chai": { + "version": "5.2.2", + "resolved": "https://registry.npmjs.org/@types/chai/-/chai-5.2.2.tgz", + "integrity": "sha512-8kB30R7Hwqf40JPiKhVzodJs2Qc1ZJ5zuT3uzw5Hq/dhNCl3G3l83jfpdI1e20BP348+fV7VIL/+FxaXkqBmWg==", + "dev": true, + "license": "MIT", + "dependencies": { + "@types/deep-eql": "*" + } + }, "node_modules/@types/conventional-commits-parser": { "version": "5.0.1", "resolved": "https://registry.npmjs.org/@types/conventional-commits-parser/-/conventional-commits-parser-5.0.1.tgz", @@ -4332,6 +4342,13 @@ "@types/ms": "*" } }, + "node_modules/@types/deep-eql": { + "version": "4.0.2", + "resolved": "https://registry.npmjs.org/@types/deep-eql/-/deep-eql-4.0.2.tgz", + "integrity": "sha512-c9h9dVVMigMPc4bwTvC5dxqtqJZwQPePsWjPlpSOnojbor6pGqdk541lfA7AqFQr5pB1BRdq0juY9db81BwyFw==", + "dev": true, + "license": "MIT" + }, "node_modules/@types/estree": { "version": "1.0.6", "resolved": "https://registry.npmjs.org/@types/estree/-/estree-1.0.6.tgz", @@ -5016,15 +5033,16 @@ } }, "node_modules/@vitest/coverage-v8": { - "version": "3.1.3", - "resolved": "https://registry.npmjs.org/@vitest/coverage-v8/-/coverage-v8-3.1.3.tgz", - "integrity": "sha512-cj76U5gXCl3g88KSnf80kof6+6w+K4BjOflCl7t6yRJPDuCrHtVu0SgNYOUARJOL5TI8RScDbm5x4s1/P9bvpw==", + "version": "3.2.4", + "resolved": "https://registry.npmjs.org/@vitest/coverage-v8/-/coverage-v8-3.2.4.tgz", + "integrity": "sha512-EyF9SXU6kS5Ku/U82E259WSnvg6c8KTjppUncuNdm5QHpe17mwREHnjDzozC8x9MZ0xfBUFSaLkRv4TMA75ALQ==", "dev": true, "license": "MIT", "dependencies": { "@ampproject/remapping": "^2.3.0", "@bcoe/v8-coverage": "^1.0.2", - "debug": "^4.4.0", + "ast-v8-to-istanbul": "^0.3.3", + "debug": "^4.4.1", "istanbul-lib-coverage": "^3.2.2", "istanbul-lib-report": "^3.0.1", "istanbul-lib-source-maps": "^5.0.6", @@ -5039,8 +5057,8 @@ "url": "https://opencollective.com/vitest" }, "peerDependencies": { - "@vitest/browser": "3.1.3", - "vitest": "3.1.3" + "@vitest/browser": "3.2.4", + "vitest": "3.2.4" }, "peerDependenciesMeta": { "@vitest/browser": { @@ -5049,14 +5067,15 @@ } }, "node_modules/@vitest/expect": { - "version": "3.1.3", - "resolved": "https://registry.npmjs.org/@vitest/expect/-/expect-3.1.3.tgz", - "integrity": "sha512-7FTQQuuLKmN1Ig/h+h/GO+44Q1IlglPlR2es4ab7Yvfx+Uk5xsv+Ykk+MEt/M2Yn/xGmzaLKxGw2lgy2bwuYqg==", + "version": "3.2.4", + "resolved": "https://registry.npmjs.org/@vitest/expect/-/expect-3.2.4.tgz", + "integrity": "sha512-Io0yyORnB6sikFlt8QW5K7slY4OjqNX9jmJQ02QDda8lyM6B5oNgVWoSoKPac8/kgnCUzuHQKrSLtu/uOqqrig==", "dev": true, "license": "MIT", "dependencies": { - "@vitest/spy": "3.1.3", - "@vitest/utils": "3.1.3", + "@types/chai": "^5.2.2", + "@vitest/spy": "3.2.4", + "@vitest/utils": "3.2.4", "chai": "^5.2.0", "tinyrainbow": "^2.0.0" }, @@ -5064,47 +5083,10 @@ "url": "https://opencollective.com/vitest" } }, - "node_modules/@vitest/mocker": { - "version": "3.1.3", - "resolved": "https://registry.npmjs.org/@vitest/mocker/-/mocker-3.1.3.tgz", - "integrity": "sha512-PJbLjonJK82uCWHjzgBJZuR7zmAOrSvKk1QBxrennDIgtH4uK0TB1PvYmc0XBCigxxtiAVPfWtAdy4lpz8SQGQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "@vitest/spy": "3.1.3", - "estree-walker": "^3.0.3", - "magic-string": "^0.30.17" - }, - "funding": { - "url": "https://opencollective.com/vitest" - }, - "peerDependencies": { - "msw": "^2.4.9", - "vite": "^5.0.0 || ^6.0.0" - }, - "peerDependenciesMeta": { - "msw": { - "optional": true - }, - "vite": { - "optional": true - } - } - }, - "node_modules/@vitest/mocker/node_modules/estree-walker": { - "version": "3.0.3", - "resolved": "https://registry.npmjs.org/estree-walker/-/estree-walker-3.0.3.tgz", - "integrity": "sha512-7RUKfXgSMMkzt6ZuXmqapOurLGPPfgj6l9uRZ7lRGolvk0y2yocc35LdcxKC5PQZdn2DMqioAQ2NoWcrTKmm6g==", - "dev": true, - "license": "MIT", - "dependencies": { - "@types/estree": "^1.0.0" - } - }, "node_modules/@vitest/pretty-format": { - "version": "3.1.3", - "resolved": "https://registry.npmjs.org/@vitest/pretty-format/-/pretty-format-3.1.3.tgz", - "integrity": "sha512-i6FDiBeJUGLDKADw2Gb01UtUNb12yyXAqC/mmRWuYl+m/U9GS7s8us5ONmGkGpUUo7/iAYzI2ePVfOZTYvUifA==", + "version": "3.2.4", + "resolved": "https://registry.npmjs.org/@vitest/pretty-format/-/pretty-format-3.2.4.tgz", + "integrity": "sha512-IVNZik8IVRJRTr9fxlitMKeJeXFFFN0JaB9PHPGQ8NKQbGpfjlTx9zO4RefN8gp7eqjNy8nyK3NZmBzOPeIxtA==", "dev": true, "license": "MIT", "dependencies": { @@ -5115,27 +5097,28 @@ } }, "node_modules/@vitest/runner": { - "version": "3.1.3", - "resolved": "https://registry.npmjs.org/@vitest/runner/-/runner-3.1.3.tgz", - "integrity": "sha512-Tae+ogtlNfFei5DggOsSUvkIaSuVywujMj6HzR97AHK6XK8i3BuVyIifWAm/sE3a15lF5RH9yQIrbXYuo0IFyA==", + "version": "3.2.4", + "resolved": "https://registry.npmjs.org/@vitest/runner/-/runner-3.2.4.tgz", + "integrity": "sha512-oukfKT9Mk41LreEW09vt45f8wx7DordoWUZMYdY/cyAk7w5TWkTRCNZYF7sX7n2wB7jyGAl74OxgwhPgKaqDMQ==", "dev": true, "license": "MIT", "dependencies": { - "@vitest/utils": "3.1.3", - "pathe": "^2.0.3" + "@vitest/utils": "3.2.4", + "pathe": "^2.0.3", + "strip-literal": "^3.0.0" }, "funding": { "url": "https://opencollective.com/vitest" } }, "node_modules/@vitest/snapshot": { - "version": "3.1.3", - "resolved": "https://registry.npmjs.org/@vitest/snapshot/-/snapshot-3.1.3.tgz", - "integrity": "sha512-XVa5OPNTYUsyqG9skuUkFzAeFnEzDp8hQu7kZ0N25B1+6KjGm4hWLtURyBbsIAOekfWQ7Wuz/N/XXzgYO3deWQ==", + "version": "3.2.4", + "resolved": "https://registry.npmjs.org/@vitest/snapshot/-/snapshot-3.2.4.tgz", + "integrity": "sha512-dEYtS7qQP2CjU27QBC5oUOxLE/v5eLkGqPE0ZKEIDGMs4vKWe7IjgLOeauHsR0D5YuuycGRO5oSRXnwnmA78fQ==", "dev": true, "license": "MIT", "dependencies": { - "@vitest/pretty-format": "3.1.3", + "@vitest/pretty-format": "3.2.4", "magic-string": "^0.30.17", "pathe": "^2.0.3" }, @@ -5144,49 +5127,49 @@ } }, "node_modules/@vitest/spy": { - "version": "3.1.3", - "resolved": "https://registry.npmjs.org/@vitest/spy/-/spy-3.1.3.tgz", - "integrity": "sha512-x6w+ctOEmEXdWaa6TO4ilb7l9DxPR5bwEb6hILKuxfU1NqWT2mpJD9NJN7t3OTfxmVlOMrvtoFJGdgyzZ605lQ==", + "version": "3.2.4", + "resolved": "https://registry.npmjs.org/@vitest/spy/-/spy-3.2.4.tgz", + "integrity": "sha512-vAfasCOe6AIK70iP5UD11Ac4siNUNJ9i/9PZ3NKx07sG6sUxeag1LWdNrMWeKKYBLlzuK+Gn65Yd5nyL6ds+nw==", "dev": true, "license": "MIT", "dependencies": { - "tinyspy": "^3.0.2" + "tinyspy": "^4.0.3" }, "funding": { "url": "https://opencollective.com/vitest" } }, "node_modules/@vitest/ui": { - "version": "3.1.3", - "resolved": "https://registry.npmjs.org/@vitest/ui/-/ui-3.1.3.tgz", - "integrity": "sha512-IipSzX+8DptUdXN/GWq3hq5z18MwnpphYdOMm0WndkRGYELzfq7NDP8dMpZT7JGW1uXFrIGxOW2D0Xi++ulByg==", + "version": "3.2.4", + "resolved": "https://registry.npmjs.org/@vitest/ui/-/ui-3.2.4.tgz", + "integrity": "sha512-hGISOaP18plkzbWEcP/QvtRW1xDXF2+96HbEX6byqQhAUbiS5oH6/9JwW+QsQCIYON2bI6QZBF+2PvOmrRZ9wA==", "dev": true, "license": "MIT", "dependencies": { - "@vitest/utils": "3.1.3", + "@vitest/utils": "3.2.4", "fflate": "^0.8.2", "flatted": "^3.3.3", "pathe": "^2.0.3", "sirv": "^3.0.1", - "tinyglobby": "^0.2.13", + "tinyglobby": "^0.2.14", "tinyrainbow": "^2.0.0" }, "funding": { "url": "https://opencollective.com/vitest" }, "peerDependencies": { - "vitest": "3.1.3" + "vitest": "3.2.4" } }, "node_modules/@vitest/utils": { - "version": "3.1.3", - "resolved": "https://registry.npmjs.org/@vitest/utils/-/utils-3.1.3.tgz", - "integrity": "sha512-2Ltrpht4OmHO9+c/nmHtF09HWiyWdworqnHIwjfvDyWjuwKbdkcS9AnhsDn+8E2RM4x++foD1/tNuLPVvWG1Rg==", + "version": "3.2.4", + "resolved": "https://registry.npmjs.org/@vitest/utils/-/utils-3.2.4.tgz", + "integrity": "sha512-fB2V0JFrQSMsCo9HiSq3Ezpdv4iYaXRG1Sx8edX3MwxfyNn83mKiGzOcH+Fkxt4MHxr3y42fQi1oeAInqgX2QA==", "dev": true, "license": "MIT", "dependencies": { - "@vitest/pretty-format": "3.1.3", - "loupe": "^3.1.3", + "@vitest/pretty-format": "3.2.4", + "loupe": "^3.1.4", "tinyrainbow": "^2.0.0" }, "funding": { @@ -5857,6 +5840,35 @@ "node": ">=12" } }, + "node_modules/ast-v8-to-istanbul": { + "version": "0.3.4", + "resolved": "https://registry.npmjs.org/ast-v8-to-istanbul/-/ast-v8-to-istanbul-0.3.4.tgz", + "integrity": "sha512-cxrAnZNLBnQwBPByK4CeDaw5sWZtMilJE/Q3iDA0aamgaIVNDF9T6K2/8DfYDZEejZ2jNnDrG9m8MY72HFd0KA==", + "dev": true, + "license": "MIT", + "dependencies": { + "@jridgewell/trace-mapping": "^0.3.29", + "estree-walker": "^3.0.3", + "js-tokens": "^9.0.1" + } + }, + "node_modules/ast-v8-to-istanbul/node_modules/estree-walker": { + "version": "3.0.3", + "resolved": "https://registry.npmjs.org/estree-walker/-/estree-walker-3.0.3.tgz", + "integrity": "sha512-7RUKfXgSMMkzt6ZuXmqapOurLGPPfgj6l9uRZ7lRGolvk0y2yocc35LdcxKC5PQZdn2DMqioAQ2NoWcrTKmm6g==", + "dev": true, + "license": "MIT", + "dependencies": { + "@types/estree": "^1.0.0" + } + }, + "node_modules/ast-v8-to-istanbul/node_modules/js-tokens": { + "version": "9.0.1", + "resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-9.0.1.tgz", + "integrity": "sha512-mxa9E9ITFOt0ban3j6L5MpjwegGz6lBQmM1IJkWeBZGcMxto50+eWdjC/52xDbS2vy0k7vIMK0Fe2wfL9OQSpQ==", + "dev": true, + "license": "MIT" + }, "node_modules/async-retry": { "version": "1.3.3", "resolved": "https://registry.npmjs.org/async-retry/-/async-retry-1.3.3.tgz", @@ -6187,9 +6199,9 @@ } }, "node_modules/chai": { - "version": "5.2.0", - "resolved": "https://registry.npmjs.org/chai/-/chai-5.2.0.tgz", - "integrity": "sha512-mCuXncKXk5iCLhfhwTc0izo0gtEmpz5CtG2y8GiOINBlMVS6v8TMRc5TaLWKS6692m9+dVVfzgeVxR5UxWHTYw==", + "version": "5.2.1", + "resolved": "https://registry.npmjs.org/chai/-/chai-5.2.1.tgz", + "integrity": "sha512-5nFxhUrX0PqtyogoYOA8IPswy5sZFTOsBFl/9bNsmDLgsxYTzSZQJDPppDnZPTQbzSEm0hqGjWPzRemQCYbD6A==", "dev": true, "license": "MIT", "dependencies": { @@ -6200,7 +6212,7 @@ "pathval": "^2.0.0" }, "engines": { - "node": ">=12" + "node": ">=18" } }, "node_modules/chalk": { @@ -7186,9 +7198,9 @@ "license": "MIT" }, "node_modules/debug": { - "version": "4.4.0", - "resolved": "https://registry.npmjs.org/debug/-/debug-4.4.0.tgz", - "integrity": "sha512-6WTZ/IxCY/T6BALoZHaE4ctp9xm+Z5kY/pzYaCHRFeyVhojxlrm+46y68HA6hr0TcwEssoxNiDEUJQjfPZ/RYA==", + "version": "4.4.1", + "resolved": "https://registry.npmjs.org/debug/-/debug-4.4.1.tgz", + "integrity": "sha512-KcKCqiftBJcZr++7ykoDIEwSa3XWowTfNPo92BYxjXiyYEVrUQh2aLyhxBCwww+heortUFxEJYcRzosstTEBYQ==", "license": "MIT", "dependencies": { "ms": "^2.1.3" @@ -8762,9 +8774,9 @@ } }, "node_modules/expect-type": { - "version": "1.2.1", - "resolved": "https://registry.npmjs.org/expect-type/-/expect-type-1.2.1.tgz", - "integrity": "sha512-/kP8CAwxzLVEeFrMm4kMmy4CCDlpipyA7MYLVrdJIkV0fYF0UaigQHRsxHiuY/GEea+bh4KSv3TIlgr+2UL6bw==", + "version": "1.2.2", + "resolved": "https://registry.npmjs.org/expect-type/-/expect-type-1.2.2.tgz", + "integrity": "sha512-JhFGDVJ7tmDJItKhYgJCGLOWjuK9vPxiXoUFLwLDc99NlmklilbiQJwoctZtt13+xMw91MCk/REan6MWHqDjyA==", "dev": true, "license": "Apache-2.0", "engines": { @@ -11771,9 +11783,9 @@ } }, "node_modules/loupe": { - "version": "3.1.3", - "resolved": "https://registry.npmjs.org/loupe/-/loupe-3.1.3.tgz", - "integrity": "sha512-kkIp7XSkP78ZxJEsSxW3712C6teJVoeHHwgo9zJ380de7IYyJ2ISlxojcH2pC5OFLewESmnRi/+XCDIEEVyoug==", + "version": "3.2.0", + "resolved": "https://registry.npmjs.org/loupe/-/loupe-3.2.0.tgz", + "integrity": "sha512-2NCfZcT5VGVNX9mSZIxLRkEAegDGBpuQZBy13desuHeVORmBDyAET4TkJr4SjqQy3A8JDofMN6LpkK8Xcm/dlw==", "dev": true, "license": "MIT" }, @@ -16582,9 +16594,9 @@ "license": "MIT" }, "node_modules/pathval": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/pathval/-/pathval-2.0.0.tgz", - "integrity": "sha512-vE7JKRyES09KiunauX7nd2Q9/L7lhok4smP9RZTDeD4MVs72Dp2qNFVz39Nz5a0FVEW0BJR6C0DYrq6unoziZA==", + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/pathval/-/pathval-2.0.1.tgz", + "integrity": "sha512-//nshmD55c46FuFw26xV/xFAaB5HF9Xdap7HJBBnrKdAd6/GxDBaNA1870O79+9ueg61cZLSVc+OaFlfmObYVQ==", "dev": true, "license": "MIT", "engines": { @@ -19002,6 +19014,26 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/strip-literal": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/strip-literal/-/strip-literal-3.0.0.tgz", + "integrity": "sha512-TcccoMhJOM3OebGhSBEmp3UZ2SfDMZUEBdRA/9ynfLi8yYajyWX3JiXArcJt4Umh4vISpspkQIY8ZZoCqjbviA==", + "dev": true, + "license": "MIT", + "dependencies": { + "js-tokens": "^9.0.1" + }, + "funding": { + "url": "https://github.com/sponsors/antfu" + } + }, + "node_modules/strip-literal/node_modules/js-tokens": { + "version": "9.0.1", + "resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-9.0.1.tgz", + "integrity": "sha512-mxa9E9ITFOt0ban3j6L5MpjwegGz6lBQmM1IJkWeBZGcMxto50+eWdjC/52xDbS2vy0k7vIMK0Fe2wfL9OQSpQ==", + "dev": true, + "license": "MIT" + }, "node_modules/sumchecker": { "version": "3.0.1", "resolved": "https://registry.npmjs.org/sumchecker/-/sumchecker-3.0.1.tgz", @@ -19409,9 +19441,9 @@ "license": "MIT" }, "node_modules/tinyglobby": { - "version": "0.2.13", - "resolved": "https://registry.npmjs.org/tinyglobby/-/tinyglobby-0.2.13.tgz", - "integrity": "sha512-mEwzpUgrLySlveBwEVDMKk5B57bhLPYovRfPAXD5gA/98Opn0rCDj3GtLwFvCvH5RK9uPCExUROW5NjDwvqkxw==", + "version": "0.2.14", + "resolved": "https://registry.npmjs.org/tinyglobby/-/tinyglobby-0.2.14.tgz", + "integrity": "sha512-tX5e7OM1HnYr2+a2C/4V0htOcSQcoSTH9KgJnVvNm5zm/cyEWKJ7j7YutsH9CxMdtOkkLFy2AHrMci9IM8IPZQ==", "dev": true, "license": "MIT", "dependencies": { @@ -19426,9 +19458,9 @@ } }, "node_modules/tinypool": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/tinypool/-/tinypool-1.0.2.tgz", - "integrity": "sha512-al6n+QEANGFOMf/dmUMsuS5/r9B06uwlyNjZZql/zv8J7ybHCgoihBNORZCY2mzUuAnomQa2JdhyHKzZxPCrFA==", + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/tinypool/-/tinypool-1.1.1.tgz", + "integrity": "sha512-Zba82s87IFq9A9XmjiX5uZA/ARWDrB03OHlq+Vw1fSdt0I+4/Kutwy8BP4Y/y/aORMo61FQ0vIb5j44vSo5Pkg==", "dev": true, "license": "MIT", "engines": { @@ -19446,9 +19478,9 @@ } }, "node_modules/tinyspy": { - "version": "3.0.2", - "resolved": "https://registry.npmjs.org/tinyspy/-/tinyspy-3.0.2.tgz", - "integrity": "sha512-n1cw8k1k0x4pgA2+9XrOkFydTerNcJ1zWCO5Nn9scWHTD+5tp8dghT2x1uduQePZTZgd3Tupf+x9BxJjeJi77Q==", + "version": "4.0.3", + "resolved": "https://registry.npmjs.org/tinyspy/-/tinyspy-4.0.3.tgz", + "integrity": "sha512-t2T/WLB2WRgZ9EpE4jgPJ9w+i66UZfDc8wHh0xrwiRNN+UwH98GIJkTeZqX9rg0i0ptwzqW+uYeIF0T4F8LR7A==", "dev": true, "license": "MIT", "engines": { @@ -20307,17 +20339,17 @@ } }, "node_modules/vite-node": { - "version": "3.1.3", - "resolved": "https://registry.npmjs.org/vite-node/-/vite-node-3.1.3.tgz", - "integrity": "sha512-uHV4plJ2IxCl4u1up1FQRrqclylKAogbtBfOTwcuJ28xFi+89PZ57BRh+naIRvH70HPwxy5QHYzg1OrEaC7AbA==", + "version": "3.2.4", + "resolved": "https://registry.npmjs.org/vite-node/-/vite-node-3.2.4.tgz", + "integrity": "sha512-EbKSKh+bh1E1IFxeO0pg1n4dvoOTt0UDiXMd/qn++r98+jPO1xtJilvXldeuQ8giIB5IkpjCgMleHMNEsGH6pg==", "dev": true, "license": "MIT", "dependencies": { "cac": "^6.7.14", - "debug": "^4.4.0", + "debug": "^4.4.1", "es-module-lexer": "^1.7.0", "pathe": "^2.0.3", - "vite": "^5.0.0 || ^6.0.0" + "vite": "^5.0.0 || ^6.0.0 || ^7.0.0-0" }, "bin": { "vite-node": "vite-node.mjs" @@ -20474,32 +20506,34 @@ } }, "node_modules/vitest": { - "version": "3.1.3", - "resolved": "https://registry.npmjs.org/vitest/-/vitest-3.1.3.tgz", - "integrity": "sha512-188iM4hAHQ0km23TN/adso1q5hhwKqUpv+Sd6p5sOuh6FhQnRNW3IsiIpvxqahtBabsJ2SLZgmGSpcYK4wQYJw==", + "version": "3.2.4", + "resolved": "https://registry.npmjs.org/vitest/-/vitest-3.2.4.tgz", + "integrity": "sha512-LUCP5ev3GURDysTWiP47wRRUpLKMOfPh+yKTx3kVIEiu5KOMeqzpnYNsKyOoVrULivR8tLcks4+lga33Whn90A==", "dev": true, "license": "MIT", "dependencies": { - "@vitest/expect": "3.1.3", - "@vitest/mocker": "3.1.3", - "@vitest/pretty-format": "^3.1.3", - "@vitest/runner": "3.1.3", - "@vitest/snapshot": "3.1.3", - "@vitest/spy": "3.1.3", - "@vitest/utils": "3.1.3", + "@types/chai": "^5.2.2", + "@vitest/expect": "3.2.4", + "@vitest/mocker": "3.2.4", + "@vitest/pretty-format": "^3.2.4", + "@vitest/runner": "3.2.4", + "@vitest/snapshot": "3.2.4", + "@vitest/spy": "3.2.4", + "@vitest/utils": "3.2.4", "chai": "^5.2.0", - "debug": "^4.4.0", + "debug": "^4.4.1", "expect-type": "^1.2.1", "magic-string": "^0.30.17", "pathe": "^2.0.3", + "picomatch": "^4.0.2", "std-env": "^3.9.0", "tinybench": "^2.9.0", "tinyexec": "^0.3.2", - "tinyglobby": "^0.2.13", - "tinypool": "^1.0.2", + "tinyglobby": "^0.2.14", + "tinypool": "^1.1.1", "tinyrainbow": "^2.0.0", - "vite": "^5.0.0 || ^6.0.0", - "vite-node": "3.1.3", + "vite": "^5.0.0 || ^6.0.0 || ^7.0.0-0", + "vite-node": "3.2.4", "why-is-node-running": "^2.3.0" }, "bin": { @@ -20515,8 +20549,8 @@ "@edge-runtime/vm": "*", "@types/debug": "^4.1.12", "@types/node": "^18.0.0 || ^20.0.0 || >=22.0.0", - "@vitest/browser": "3.1.3", - "@vitest/ui": "3.1.3", + "@vitest/browser": "3.2.4", + "@vitest/ui": "3.2.4", "happy-dom": "*", "jsdom": "*" }, @@ -20544,6 +20578,43 @@ } } }, + "node_modules/vitest/node_modules/@vitest/mocker": { + "version": "3.2.4", + "resolved": "https://registry.npmjs.org/@vitest/mocker/-/mocker-3.2.4.tgz", + "integrity": "sha512-46ryTE9RZO/rfDd7pEqFl7etuyzekzEhUbTW3BvmeO/BcCMEgq59BKhek3dXDWgAj4oMK6OZi+vRr1wPW6qjEQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "@vitest/spy": "3.2.4", + "estree-walker": "^3.0.3", + "magic-string": "^0.30.17" + }, + "funding": { + "url": "https://opencollective.com/vitest" + }, + "peerDependencies": { + "msw": "^2.4.9", + "vite": "^5.0.0 || ^6.0.0 || ^7.0.0-0" + }, + "peerDependenciesMeta": { + "msw": { + "optional": true + }, + "vite": { + "optional": true + } + } + }, + "node_modules/vitest/node_modules/estree-walker": { + "version": "3.0.3", + "resolved": "https://registry.npmjs.org/estree-walker/-/estree-walker-3.0.3.tgz", + "integrity": "sha512-7RUKfXgSMMkzt6ZuXmqapOurLGPPfgj6l9uRZ7lRGolvk0y2yocc35LdcxKC5PQZdn2DMqioAQ2NoWcrTKmm6g==", + "dev": true, + "license": "MIT", + "dependencies": { + "@types/estree": "^1.0.0" + } + }, "node_modules/vitest/node_modules/tinyexec": { "version": "0.3.2", "resolved": "https://registry.npmjs.org/tinyexec/-/tinyexec-0.3.2.tgz", diff --git a/package.json b/package.json index bbaeabc5..d5a819e3 100644 --- a/package.json +++ b/package.json @@ -155,8 +155,8 @@ "@types/validate-npm-package-name": "^4.0.2", "@types/which": "^3.0.4", "@types/yargs": "^17.0.33", - "@vitest/coverage-v8": "^3.1.3", - "@vitest/ui": "^3.1.3", + "@vitest/coverage-v8": "^3.2.4", + "@vitest/ui": "^3.2.4", "electron": "^37.2.4", "eslint": "^9.26.0", "eslint-import-resolver-typescript": "^4.3.4", @@ -176,10 +176,10 @@ "typedoc-vitepress-theme": "^1.1.2", "typescript": "^5.8.3", "typescript-eslint": "^8.32.0", - "vite-node": "^3.1.3", + "vite-node": "^3.2.4", "vitepress": "^1.6.3", "vitepress-plugin-llms": "^1.7.2", - "vitest": "^3.1.3", + "vitest": "^3.2.4", "zx": "^8.5.4" }, "dependencies": { diff --git a/src/bindings/Llama.ts b/src/bindings/Llama.ts index c4ceb32b..f48f5474 100644 --- a/src/bindings/Llama.ts +++ b/src/bindings/Llama.ts @@ -664,6 +664,8 @@ function getTransformedLogLevel(level: LlamaLogLevel, message: string): LlamaLog return LlamaLogLevel.info; else if (level === LlamaLogLevel.warn && message.startsWith("init: embeddings required but some input tokens were not marked as outputs -> overriding")) return LlamaLogLevel.info; + else if (level === LlamaLogLevel.warn && message.startsWith("load: special_eog_ids contains both '<|return|>' and '<|call|>' tokens, removing '<|end|>' token from EOG list")) + return LlamaLogLevel.info; return level; } diff --git a/src/chatWrappers/HarmonyChatWrapper.ts b/src/chatWrappers/HarmonyChatWrapper.ts new file mode 100644 index 00000000..3dfadf9a --- /dev/null +++ b/src/chatWrappers/HarmonyChatWrapper.ts @@ -0,0 +1,709 @@ +import {ChatWrapper, ChatWrapperJinjaMatchConfiguration} from "../ChatWrapper.js"; +import { + ChatModelFunctions, ChatModelResponse, ChatWrapperGenerateContextStateOptions, ChatWrapperGeneratedContextState, + ChatWrapperGeneratedPrefixTriggersContextState, ChatWrapperSettings +} from "../types.js"; +import {SpecialToken, LlamaText, SpecialTokensText} from "../utils/LlamaText.js"; +import {ChatModelFunctionsDocumentationGenerator} from "./utils/ChatModelFunctionsDocumentationGenerator.js"; +import {jsonDumps} from "./utils/jsonDumps.js"; + +const defaultModelIdentity = "You are ChatGPT, a large language model trained by OpenAI."; +const defaultCuttingKnowledgeDate = new Date("2024-06-01T00:00:00Z"); +const defaultReasoningEffort = "medium"; + +// source: https://github.com/openai/harmony, https://cookbook.openai.com/articles/openai-harmony, +// https://github.com/openai/openai-cookbook/blob/main/articles/openai-harmony.md +export class HarmonyChatWrapper extends ChatWrapper { + public readonly wrapperName: string = "Harmony"; + + public readonly modelIdentity: string | null; + public readonly cuttingKnowledgeDate?: Date | (() => Date) | null; + public readonly todayDate: Date | (() => Date) | null; + public readonly reasoningEffort: "high" | "medium" | "low" | null; + public readonly requiredChannels: { + analysis: boolean, + commentary: boolean, + final: boolean + }; + public readonly keepOnlyLastThought: boolean; + + /** @internal */ private readonly _jinjaFlags: JinjaMatchFlags; + + public override readonly settings: ChatWrapperSettings = { + supportsSystemMessages: true, + functions: { + call: { + optionalPrefixSpace: true, + prefix: LlamaText(new SpecialTokensText(" to="), "functions."), + paramsPrefix: LlamaText(new SpecialTokensText("<|constrain|>json<|message|>")), + suffix: LlamaText(new SpecialTokensText("<|call|>")), + emptyCallParamsPlaceholder: {} + }, + result: { + prefix: LlamaText(new SpecialTokensText("<|start|>"), "functions.{{functionName}}", new SpecialTokensText(" to=assistant<|channel|>commentary<|message|>")), + suffix: LlamaText(new SpecialTokensText("<|end|>")) + } + }, + segments: { + thought: { + prefix: LlamaText(new SpecialTokensText("<|channel|>analysis<|message|>")), + suffix: LlamaText(new SpecialTokensText("<|end|>")) + }, + comment: { + prefix: LlamaText(new SpecialTokensText("<|channel|>commentary<|message|>")), + suffix: LlamaText(new SpecialTokensText("<|end|>")) + } + } + }; + + public constructor(options: { + /** + * The model identity to use in the internal system message. + * + * Set to `null` to disable. + * + * Defaults to `"You are ChatGPT, a large language model trained by OpenAI."` + */ + modelIdentity?: string | null, + + /** + * Set to `null` to disable + * + * Defaults to `new Date("2024-06-01T00:00:00Z")` + */ + cuttingKnowledgeDate?: Date | (() => Date) | number | string | null, + + /** + * Set to `null` to disable + * + * Defaults to the current date + */ + todayDate?: Date | (() => Date) | number | string | null, + + /** + * The amount of reasoning to instruct the model to use. + * + * Not enforced, it's up to the model to follow this instruction. + * + * Set to `null` to omit the instruction. + * + * Defaults to `"medium"`. + */ + reasoningEffort?: "high" | "medium" | "low" | null, + + requiredChannels?: { + /** + * Defaults to `true` + */ + analysis?: boolean, + + /** + * Defaults to `true` + */ + commentary?: boolean, + + /** + * Defaults to `true` + */ + final?: boolean + }, + + /** + * Whether to keep only the chain of thought from the last model response. + * + * Setting this to `false` will keep all the chain of thoughts from the model responses in the context state. + * + * Defaults to `true`. + */ + keepOnlyLastThought?: boolean, + + /** @internal */ + _jinjaFlags?: JinjaMatchFlags + } = {}) { + super(); + + const { + modelIdentity = defaultModelIdentity, + cuttingKnowledgeDate = defaultCuttingKnowledgeDate, + todayDate = () => new Date(), + reasoningEffort = defaultReasoningEffort, + requiredChannels = {}, + keepOnlyLastThought = true, + + _jinjaFlags = {} + } = options; + + this.modelIdentity = modelIdentity; + this.cuttingKnowledgeDate = cuttingKnowledgeDate == null + ? null + : cuttingKnowledgeDate instanceof Function + ? cuttingKnowledgeDate + : new Date(cuttingKnowledgeDate); + this.todayDate = todayDate == null + ? null + : todayDate instanceof Function + ? todayDate + : new Date(todayDate); + this.reasoningEffort = reasoningEffort; + this.requiredChannels = { + analysis: requiredChannels.analysis ?? true, + commentary: requiredChannels.commentary ?? true, + final: requiredChannels.final ?? true + }; + this.keepOnlyLastThought = keepOnlyLastThought; + + this._jinjaFlags = { + emptyLastModelResponseIsFinalMessage: false, + useSpecialTokensForFullSystemMessage: false, + disableNonFinalFinalMessages: false, + useNonFinalFinalMessage: false, + noFinalMessages: false, + ..._jinjaFlags + }; + } + + public override generateContextState({ + chatHistory, availableFunctions, documentFunctionParams + }: ChatWrapperGenerateContextStateOptions): ChatWrapperGeneratedContextState { + const hasFunctions = Object.keys(availableFunctions ?? {}).length > 0; + const modifiedChatHistory = chatHistory.slice(); + + let systemMessage: LlamaText = LlamaText(); + if (modifiedChatHistory[0]?.type === "system") { + systemMessage = LlamaText.fromJSON(modifiedChatHistory[0].text); + modifiedChatHistory.shift(); + } + + const contextContent: LlamaText[] = [ + this._getPreamble(hasFunctions) + ]; + + if (systemMessage.values.length > 0 || hasFunctions) + contextContent.push( + LlamaText([ + new SpecialTokensText("<|start|>developer<|message|>"), + this._getFirstDeveloperMessage(systemMessage, availableFunctions, {documentParams: documentFunctionParams}), + new SpecialTokensText("<|end|>") + ]) + ); + + let needsTriggers = true; + for (let i = 0; i < modifiedChatHistory.length; i++) { + const isLastItem = i === modifiedChatHistory.length - 1; + const item = modifiedChatHistory[i]; + + if (item == null) + continue; + + if (item.type === "system") { + contextContent.push( + LlamaText([ + new SpecialTokensText("<|start|>developer<|message|>"), + LlamaText.fromJSON(item.text), + isLastItem + ? LlamaText([]) + : new SpecialTokensText("<|end|>") + ]) + ); + + if (isLastItem) + needsTriggers = false; + } else if (item.type === "user") { + contextContent.push( + LlamaText([ + new SpecialTokensText("<|start|>user<|message|>"), + item.text, + isLastItem + ? LlamaText([]) + : new SpecialTokensText("<|end|>") + ]) + ); + + if (isLastItem) + needsTriggers = false; + } else if (item.type === "model") { + const { + res, needsTriggers: modelNeedsTriggers + } = this._getModelResponse(item.response, true, isLastItem, this.keepOnlyLastThought); + + if (isLastItem) + needsTriggers = modelNeedsTriggers; + + contextContent.push(res); + } else + void (item satisfies never); + } + + const contextText = LlamaText(contextContent); + + if (!needsTriggers) + return { + contextText, + stopGenerationTriggers: [ + LlamaText(new SpecialToken("EOS")), + LlamaText(new SpecialTokensText("<|return|>")), + LlamaText("<|return|>") + ], + detectFunctionCalls: false, + rerender: { + triggers: [ + LlamaText(new SpecialTokensText("<|end|>")) + ], + action: "closeResponseItem" + } + }; + + return { + contextText, + stopGenerationTriggers: [ + LlamaText(new SpecialToken("EOS")), + LlamaText(new SpecialTokensText("<|return|>")), + LlamaText("<|return|>") + ], + prefixTriggers: [ + { + type: "segment", + segmentType: "thought", + triggers: [ + LlamaText(new SpecialTokensText("<|channel|>analysis<|message|>")) + ] + }, { + type: "segment", + segmentType: "comment", + triggers: [ + // the trigger here includes the `<|message|>` part + // to not conflict with the `<|channel|>commentary to=` prefix used for function calls + LlamaText(new SpecialTokensText("<|channel|>commentary<|message|>")) + ] + }, { + type: "response", + triggers: [ + LlamaText(new SpecialTokensText("<|channel|>final")) + ], + inject: LlamaText(new SpecialTokensText("<|message|>")) + }, + ...( + !hasFunctions ? [] : [{ + type: "functionCall", + triggers: [ + LlamaText(new SpecialTokensText("<|channel|>commentary to=")) + ], + replaceTrigger: true, + inject: LlamaText(new SpecialTokensText("<|channel|>commentary")) + }, { + type: "functionCall", + triggers: [ + LlamaText(new SpecialTokensText("<|channel|>analysis to=")) + ], + replaceTrigger: true, + inject: LlamaText(new SpecialTokensText("<|channel|>analysis")) + }] satisfies ChatWrapperGeneratedPrefixTriggersContextState["prefixTriggers"] + ) + ], + noPrefixTrigger: { + type: "response", + inject: LlamaText(new SpecialTokensText("<|channel|>final<|message|>")) + }, + detectFunctionCalls: false, + rerender: { + triggers: [ + LlamaText(new SpecialTokensText("<|end|>")) + ], + action: "closeResponseItem" + } + }; + } + + public override generateFunctionCall(name: string, params: any): LlamaText { + const emptyCallParamsPlaceholder = this.settings.functions.call.emptyCallParamsPlaceholder; + return LlamaText([ + new SpecialTokensText("<|start|>assistant<|channel|>commentary to="), + "functions.", name, + this.settings.functions.call.paramsPrefix, + params === undefined + ? (emptyCallParamsPlaceholder === undefined || emptyCallParamsPlaceholder === "") + ? "" + : jsonDumps(emptyCallParamsPlaceholder) + : jsonDumps(params), + this.settings.functions.call.suffix + ]); + } + + public override generateFunctionCallResult(functionName: string, functionParams: any, result: any): LlamaText { + return LlamaText([ + new SpecialTokensText("<|start|>"), + "functions.", functionName, + new SpecialTokensText(" to=assistant<|channel|>commentary<|message|>"), + ( + result === undefined + ? "" + : jsonDumps(result) + ), + new SpecialTokensText("<|end|>") + ]); + } + + public override generateModelResponseText(modelResponse: ChatModelResponse["response"], useRawValues: boolean = true): LlamaText { + const {res} = this._getModelResponse(modelResponse, useRawValues, false, false); + const [start, ...rest] = res.values; + let newStart = start; + let newEnd = rest.pop(); + + if (newStart instanceof SpecialTokensText && newStart.value.startsWith("<|start|>assistant")) + newStart = new SpecialTokensText(newStart.value.slice("<|start|>assistant".length)); + + if (newEnd instanceof SpecialTokensText && newEnd.value.startsWith("<|end|>")) + newEnd = new SpecialTokensText(newEnd.value.slice("<|end|>".length)); + else if (newEnd instanceof SpecialTokensText && newEnd.value.startsWith("<|return|>")) + newEnd = new SpecialTokensText(newEnd.value.slice("<|return|>".length)); + + return LlamaText([ + newStart ?? [], + ...rest, + newEnd ?? [] + ]); + } + + public override generateAvailableFunctionsSystemText(availableFunctions: ChatModelFunctions, {documentParams = true}: { + documentParams?: boolean + }) { + const functionsDocumentationGenerator = new ChatModelFunctionsDocumentationGenerator(availableFunctions); + + if (!functionsDocumentationGenerator.hasAnyFunctions) + return LlamaText([]); + + return LlamaText.joinValues("\n", [ + "# Tools", + "", + "## functions", + "", + "namespace functions {", + "", + functionsDocumentationGenerator + .getTypeScriptFunctionTypes({documentParams}) + .split("\n") + .map((line) => line.trim()) + .join("\n"), + "", + "} // namespace functions" + ]); + } + + /** @internal */ + private _getFirstDeveloperMessage( + systemPrompt: LlamaText, + availableFunctions?: ChatModelFunctions, + {documentParams = true}: {documentParams?: boolean} = {} + ) { + const functionsDocumentationGenerator = new ChatModelFunctionsDocumentationGenerator(availableFunctions); + + if (!functionsDocumentationGenerator.hasAnyFunctions && systemPrompt.values.length === 0) + return LlamaText([]); + + if (!functionsDocumentationGenerator.hasAnyFunctions) + return LlamaText([ + this._jinjaFlags.useSpecialTokensForFullSystemMessage + ? new SpecialTokensText("# Instructions\n\n") + : "# Instruction\n\n", + systemPrompt + ]); + + return LlamaText([ + this._jinjaFlags.useSpecialTokensForFullSystemMessage + ? new SpecialTokensText("# Instructions\n\n") + : "# Instructions\n\n", + systemPrompt.values.length > 0 + ? [systemPrompt, "\n\n"] + : [], + this.generateAvailableFunctionsSystemText(availableFunctions ?? {}, {documentParams}) + ]); + } + + /** @internal */ + private _getModelResponse( + modelResponse: ChatModelResponse["response"], + useRawValues: boolean, + isLastItem: boolean, + keepOnlyLastThought: boolean + ) { + const res: LlamaText[] = []; + let canEnableTriggers = true; + + for (let index = 0; index < modelResponse.length; index++) { + const isLastResponse = index === modelResponse.length - 1; + const response = modelResponse[index]; + + if (response == null) + continue; + else if (response === "" && (!isLastResponse || !isLastItem)) + continue; + + if (typeof response === "string") { + if (isLastItem && isLastResponse) { + if (response === "" && !this._jinjaFlags.emptyLastModelResponseIsFinalMessage) + canEnableTriggers = true; + else if (!this._jinjaFlags.useNonFinalFinalMessage) { + res.push( + LlamaText([ + new SpecialTokensText("<|start|>assistant<|channel|>final<|message|>"), + response + ]) + ); + canEnableTriggers = false; + } else { + res.push( + LlamaText([ + new SpecialTokensText("<|start|>assistant<|message|>"), + response + ]) + ); + canEnableTriggers = false; + } + } else if (!this._jinjaFlags.noFinalMessages && (isLastResponse || !this._jinjaFlags.disableNonFinalFinalMessages)) + res.push( + LlamaText([ + new SpecialTokensText("<|start|>assistant<|channel|>final<|message|>"), + response, + new SpecialTokensText("<|end|>") + ]) + ); + else + res.push( + LlamaText([ + new SpecialTokensText("<|start|>assistant<|message|>"), + response, + new SpecialTokensText("<|end|>") + ]) + ); + } else if (response.type === "segment") { + if (response.ended && response.raw != null && useRawValues) + res.push(LlamaText([ + new SpecialTokensText("<|start|>assistant"), + LlamaText.fromJSON(response.raw) + ])); + else if (response.segmentType === "thought") { + if (keepOnlyLastThought && !isLastItem) + continue; + + res.push( + LlamaText([ + new SpecialTokensText("<|start|>assistant<|channel|>analysis<|message|>"), + response.text, + (isLastItem && !response.ended) + ? LlamaText([]) + : new SpecialTokensText("<|end|>") + ]) + ); + + if (isLastItem && isLastResponse && !response.ended) + canEnableTriggers = false; + } else if (response.segmentType === "comment") { + res.push( + LlamaText([ + new SpecialTokensText("<|start|>assistant<|channel|>commentary<|message|>"), + response.text, + (isLastItem && !response.ended) + ? LlamaText([]) + : new SpecialTokensText("<|end|>") + ]) + ); + + if (isLastItem && isLastResponse && !response.ended) + canEnableTriggers = false; + } else + void (response.segmentType satisfies never); + } else if (response.type === "functionCall") { + res.push( + LlamaText([ + (response.rawCall != null && useRawValues) + ? LlamaText.fromJSON(response.rawCall) + : this.generateFunctionCall(response.name, response.params), + this.generateFunctionCallResult(response.name, response.params, response.result) + ]) + ); + } else + void (response satisfies never); + } + + const needsTriggers = canEnableTriggers && isLastItem; + if (needsTriggers) + res.push( + LlamaText([ + new SpecialTokensText("<|start|>assistant") + ]) + ); + + return { + res: LlamaText(res), + needsTriggers + }; + } + + /** @internal */ + private _getPreamble(hasFunctions: boolean) { + const formatCutoff = (date: Date, timezone?: "UTC") => { + const month = date.toLocaleDateString("en-US", {month: "numeric", timeZone: timezone}).padStart(2, "0"); + const year = date.toLocaleDateString("en-US", {year: "numeric", timeZone: timezone}).padStart(4, "0"); + return `${year}-${month}`; + }; + + const lines: string[] = []; + + if (this.modelIdentity != null && this.modelIdentity !== "") + lines.push(this.modelIdentity); + + if (this.cuttingKnowledgeDate != null) { + const date = this.cuttingKnowledgeDate instanceof Function + ? this.cuttingKnowledgeDate() + : this.cuttingKnowledgeDate; + + lines.push(`Knowledge cutoff: ${formatCutoff(date, "UTC")}`); + + if (this._jinjaFlags.formatting === 1) + lines.push([lines.shift(), lines.shift()].filter(Boolean).join("")); + } + + if (this.todayDate != null) { + const date = this.todayDate instanceof Function + ? this.todayDate() + : this.todayDate; + lines.push(`Current date: ${formatDate(date, undefined)}`); + } + + if (this.reasoningEffort != null) { + if (lines.length > 0) + lines.push(""); + + if (this._jinjaFlags.formatting === 1) + lines.push(`reasoning: ${this.reasoningEffort}`); + else + lines.push(`Reasoning: ${this.reasoningEffort}`); + } + + if (this.requiredChannels.analysis || this.requiredChannels.commentary || this.requiredChannels.final) { + const channels: string[] = [ + ...(this.requiredChannels.analysis ? ["analysis"] : []), + ...(this.requiredChannels.commentary ? ["commentary"] : []), + ...(this.requiredChannels.final ? ["final"] : []) + ]; + + if (lines.length > 0) + lines.push(""); + + lines.push(`# Valid channels: ${channels.join(", ")}. Channel must be included for every message.`); + + if ((this.requiredChannels.commentary && hasFunctions) || this._jinjaFlags.formatting === 1) + lines.push("Calls to these tools must go to the commentary channel: 'functions'."); + } + + return LlamaText([ + new SpecialTokensText("<|start|>system<|message|>"), + this._jinjaFlags.useSpecialTokensForFullSystemMessage + ? new SpecialTokensText(lines.join("\n")) + : lines.join("\n"), + new SpecialTokensText("<|end|>") + ]); + } + + /** @internal */ + public static override _getOptionConfigurationsToTestIfCanSupersedeJinjaTemplate(): ChatWrapperJinjaMatchConfiguration { + const jinjaParameters = { + "model_identity": defaultModelIdentity, + "reasoning_effort": defaultReasoningEffort + }; + + return [ + [{}, {}], + [{_jinjaFlags: {emptyLastModelResponseIsFinalMessage: true}}, {}], + [{}, {}, {additionalRenderParameters: jinjaParameters}], + [{_jinjaFlags: {emptyLastModelResponseIsFinalMessage: true}}, {}, {additionalRenderParameters: jinjaParameters}], + [{_jinjaFlags: {useSpecialTokensForFullSystemMessage: true}}, {}, {additionalRenderParameters: jinjaParameters}], + [ + {_jinjaFlags: {emptyLastModelResponseIsFinalMessage: true, useSpecialTokensForFullSystemMessage: true}}, + {}, + {additionalRenderParameters: jinjaParameters} + ], + [ + { + _jinjaFlags: { + emptyLastModelResponseIsFinalMessage: true, + useSpecialTokensForFullSystemMessage: true, + disableNonFinalFinalMessages: true + } + }, + {}, + {additionalRenderParameters: jinjaParameters} + ], + [ + { + _jinjaFlags: { + emptyLastModelResponseIsFinalMessage: true, + useSpecialTokensForFullSystemMessage: true, + disableNonFinalFinalMessages: true, + useNonFinalFinalMessage: true + } + }, + {}, + {additionalRenderParameters: jinjaParameters} + ], + [ + { + _jinjaFlags: { + emptyLastModelResponseIsFinalMessage: true, + useSpecialTokensForFullSystemMessage: true, + useNonFinalFinalMessage: true + } + }, + {}, + {additionalRenderParameters: jinjaParameters} + ], + [ + { + _jinjaFlags: { + emptyLastModelResponseIsFinalMessage: true, + useSpecialTokensForFullSystemMessage: true, + useNonFinalFinalMessage: true, + noFinalMessages: true + } + }, + {}, + {additionalRenderParameters: jinjaParameters} + ], + [ + { + _jinjaFlags: { + emptyLastModelResponseIsFinalMessage: true, + useSpecialTokensForFullSystemMessage: true, + useNonFinalFinalMessage: true, + noFinalMessages: true, + formatting: 1 + } + }, + {}, + {additionalRenderParameters: jinjaParameters} + ], + + [{todayDate: null}, {}, {}], + [{cuttingKnowledgeDate: null}, {}, {}], + [{reasoningEffort: null}, {}, {}], + [{todayDate: null, cuttingKnowledgeDate: null}, {}, {}], + [{todayDate: null, cuttingKnowledgeDate: null, reasoningEffort: null}, {}, {}] + ]; + } +} + +function formatDate(date: Date, timezone?: "UTC") { + const day = date.toLocaleDateString("en-US", {day: "numeric", timeZone: timezone}).padStart(2, "0"); + const month = date.toLocaleDateString("en-US", {month: "numeric", timeZone: timezone}).padStart(2, "0"); + const year = date.toLocaleDateString("en-US", {year: "numeric", timeZone: timezone}).padStart(4, "0"); + return `${year}-${month}-${day}`; +} + +type JinjaMatchFlags = { + emptyLastModelResponseIsFinalMessage?: boolean, + useSpecialTokensForFullSystemMessage?: boolean, + disableNonFinalFinalMessages?: boolean, + useNonFinalFinalMessage?: boolean, + noFinalMessages?: boolean, + formatting?: 1 +}; diff --git a/src/chatWrappers/utils/resolveChatWrapper.ts b/src/chatWrappers/utils/resolveChatWrapper.ts index 9feb89fc..abc25c46 100644 --- a/src/chatWrappers/utils/resolveChatWrapper.ts +++ b/src/chatWrappers/utils/resolveChatWrapper.ts @@ -18,6 +18,7 @@ import {Tokenizer} from "../../types.js"; import {includesText} from "../../utils/includesText.js"; import {LlamaModel} from "../../evaluator/LlamaModel/LlamaModel.js"; import {QwenChatWrapper} from "../QwenChatWrapper.js"; +import {HarmonyChatWrapper} from "../HarmonyChatWrapper.js"; import {isJinjaTemplateEquivalentToSpecializedChatWrapper} from "./isJinjaTemplateEquivalentToSpecializedChatWrapper.js"; import {getModelLinageNames} from "./getModelLinageNames.js"; import type {GgufFileInfo} from "../../gguf/types/GgufFileInfoTypes.js"; @@ -25,7 +26,7 @@ import type {GgufFileInfo} from "../../gguf/types/GgufFileInfoTypes.js"; export const specializedChatWrapperTypeNames = Object.freeze([ "general", "deepSeek", "qwen", "llama3.2-lightweight", "llama3.1", "llama3", "llama2Chat", "mistral", "alpacaChat", "functionary", - "chatML", "falconChat", "gemma" + "chatML", "falconChat", "gemma", "harmony" ] as const); export type SpecializedChatWrapperTypeName = (typeof specializedChatWrapperTypeNames)[number]; @@ -55,6 +56,7 @@ export const chatWrappers = Object.freeze({ "chatML": ChatMLChatWrapper, "falconChat": FalconChatWrapper, "gemma": GemmaChatWrapper, + "harmony": HarmonyChatWrapper, "template": TemplateChatWrapper, "jinjaTemplate": JinjaTemplateChatWrapper } as const satisfies Record); @@ -65,6 +67,10 @@ const chatWrapperToConfigType = new Map( )) ); +const specializedChatWrapperRelatedTexts = { + "harmony": ["gpt", "gpt-oss"] +} satisfies Partial>; + export type BuiltInChatWrapperType = InstanceType; export type ResolveChatWrapperOptions = { @@ -358,12 +364,16 @@ export function resolveChatWrapper( return createSpecializedChatWrapper(MistralChatWrapper); else if (includesText(modelNames, ["Gemma", "Gemma 2"])) return createSpecializedChatWrapper(GemmaChatWrapper); + else if (includesText(modelNames, ["gpt-oss", "Gpt Oss", "Gpt-Oss", "openai_gpt-oss", "Openai_Gpt Oss", "openai.gpt-oss", "Openai.Gpt Oss"])) + return createSpecializedChatWrapper(HarmonyChatWrapper); } // try to find a pattern in the Jinja template to resolve to a specialized chat wrapper, // with a logic similar to `llama.cpp`'s `llama_chat_apply_template_internal` function if (modelJinjaTemplate != null && modelJinjaTemplate.trim() !== "") { - if (modelJinjaTemplate.includes("<|im_start|>")) + if (modelJinjaTemplate.includes("<|start|>") && modelJinjaTemplate.includes("<|channel|>")) + return createSpecializedChatWrapper(HarmonyChatWrapper); + else if (modelJinjaTemplate.includes("<|im_start|>")) return createSpecializedChatWrapper(ChatMLChatWrapper); else if (modelJinjaTemplate.includes("[INST]")) return createSpecializedChatWrapper(Llama2ChatWrapper, { @@ -479,6 +489,14 @@ function orderChatWrapperNamesByAssumedCompatibilityWithModel getPointsForTextMatch(pattern, fullText, existsPoints, positionPoints)) + .reduce((res, item) => Math.max(res, item), 0); + } + const modelName = fileInfo?.metadata?.general?.name; return chatWrapperNames @@ -487,11 +505,11 @@ function orderChatWrapperNamesByAssumedCompatibilityWithModel = { .option("batchSize", { alias: "b", type: "number", - description: "Batch size to use for the model context. The default value is the context size" + description: "Batch size to use for the model context" }) .option("flashAttention", { alias: "fa", diff --git a/src/cli/commands/CompleteCommand.ts b/src/cli/commands/CompleteCommand.ts index 55cc01b1..d4d12c7e 100644 --- a/src/cli/commands/CompleteCommand.ts +++ b/src/cli/commands/CompleteCommand.ts @@ -114,7 +114,7 @@ export const CompleteCommand: CommandModule = { .option("batchSize", { alias: "b", type: "number", - description: "Batch size to use for the model context. The default value is the context size" + description: "Batch size to use for the model context" }) .option("flashAttention", { alias: "fa", diff --git a/src/cli/commands/InfillCommand.ts b/src/cli/commands/InfillCommand.ts index b07f1e59..934057c3 100644 --- a/src/cli/commands/InfillCommand.ts +++ b/src/cli/commands/InfillCommand.ts @@ -124,7 +124,7 @@ export const InfillCommand: CommandModule = { .option("batchSize", { alias: "b", type: "number", - description: "Batch size to use for the model context. The default value is the context size" + description: "Batch size to use for the model context" }) .option("flashAttention", { alias: "fa", diff --git a/src/cli/commands/inspect/commands/InspectMeasureCommand.ts b/src/cli/commands/inspect/commands/InspectMeasureCommand.ts index 7bbc756a..7e19aa96 100644 --- a/src/cli/commands/inspect/commands/InspectMeasureCommand.ts +++ b/src/cli/commands/inspect/commands/InspectMeasureCommand.ts @@ -32,6 +32,7 @@ type InspectMeasureCommand = { maxContextSize?: number, flashAttention?: boolean, swaFullCache?: boolean, + batchSize?: number, measures: number, memory: "vram" | "ram" | "all", noMmap: boolean, @@ -111,6 +112,11 @@ export const InspectMeasureCommand: CommandModule default: false, description: "Disable SWA (Sliding Window Attention) on supported models" }) + .option("batchSize", { + alias: "b", + type: "number", + description: "Batch size to use for the model context" + }) .option("measures", { alias: "n", type: "number", @@ -148,7 +154,7 @@ export const InspectMeasureCommand: CommandModule }, async handler({ modelPath: ggufPath, header: headerArg, gpu, minLayers, maxLayers, minContextSize, maxContextSize, flashAttention, swaFullCache, - measures = 10, memory: measureMemoryType, noMmap, printHeaderBeforeEachLayer = true, evaluateText, repeatEvaluateText + batchSize, measures = 10, memory: measureMemoryType, noMmap, printHeaderBeforeEachLayer = true, evaluateText, repeatEvaluateText }: InspectMeasureCommand) { if (maxLayers === -1) maxLayers = undefined; if (maxContextSize === -1) maxContextSize = undefined; @@ -224,6 +230,7 @@ export const InspectMeasureCommand: CommandModule minContextSize, flashAttention, swaFullCache, + batchSize, tests: measures, evaluateText: evaluateText == null ? undefined @@ -295,7 +302,8 @@ export const InspectMeasureCommand: CommandModule contextSize: previousContextSizeCheck, modelGpuLayers: lastGpuLayers, flashAttention, - swaFullCache + swaFullCache, + batchSize }); const contextVramEstimation = contextResourceEstimation?.gpuVram; @@ -505,7 +513,7 @@ const expectedFileName = "InspectMeasureCommand"; async function measureModel({ modelPath, useMmap, gpu, tests, initialMaxContextSize, maxContextSize, minContextSize, maxGpuLayers, minGpuLayers, flashAttention, - swaFullCache, evaluateText, exitAfterMeasurement = false, onInfo + swaFullCache, batchSize, evaluateText, exitAfterMeasurement = false, onInfo }: { modelPath: string, useMmap?: boolean, @@ -518,6 +526,7 @@ async function measureModel({ minGpuLayers?: number, flashAttention?: boolean, swaFullCache?: boolean, + batchSize?: number, evaluateText?: string, exitAfterMeasurement?: boolean, onInfo(data: { @@ -626,6 +635,7 @@ async function measureModel({ minGpuLayers, flashAttention, swaFullCache, + batchSize, evaluateText, exitAfterMeasurement } satisfies ParentToChildMessage); @@ -728,10 +738,10 @@ async function runTestWorkerLogic() { async function testContextSizes({ model, modelVramUsage, modelRamUsage, startContextSize, maxContextSize, minContextSize, tests, flashAttention, swaFullCache, - evaluateText, exitAfterMeasurement = false + batchSize, evaluateText, exitAfterMeasurement = false }: { model: LlamaModel, modelVramUsage: number, modelRamUsage: number, startContextSize?: number, maxContextSize?: number, - minContextSize?: number, tests: number, flashAttention?: boolean, swaFullCache?: boolean, evaluateText?: string, + minContextSize?: number, tests: number, flashAttention?: boolean, swaFullCache?: boolean, batchSize?: number, evaluateText?: string, exitAfterMeasurement?: boolean }) { let measurementsDone: number = 0; @@ -763,6 +773,7 @@ async function runTestWorkerLogic() { ignoreMemorySafetyChecks: currentContextSizeCheck != null, flashAttention, swaFullCache, + batchSize, failedCreationRemedy: false }); @@ -816,11 +827,12 @@ async function runTestWorkerLogic() { } async function testWithGpuLayers({ - modelPath, useMmap, gpuLayers, tests, startContextSize, maxContextSize, minContextSize, flashAttention, swaFullCache, evaluateText, - exitAfterMeasurement = false + modelPath, useMmap, gpuLayers, tests, startContextSize, maxContextSize, minContextSize, flashAttention, swaFullCache, batchSize, + evaluateText, exitAfterMeasurement = false }: { modelPath: string, useMmap?: boolean, gpuLayers: number, tests: number, startContextSize?: number, maxContextSize?: number, - minContextSize?: number, flashAttention?: boolean, swaFullCache?: boolean, evaluateText?: string, exitAfterMeasurement?: boolean + minContextSize?: number, flashAttention?: boolean, swaFullCache?: boolean, batchSize?: number, evaluateText?: string, + exitAfterMeasurement?: boolean }) { try { const preModelVramUsage = (await llama.getVramState()).used; @@ -854,6 +866,7 @@ async function runTestWorkerLogic() { minContextSize, flashAttention, swaFullCache, + batchSize, tests, evaluateText, exitAfterMeasurement @@ -903,6 +916,7 @@ async function runTestWorkerLogic() { minContextSize: message.minContextSize, flashAttention: message.flashAttention, swaFullCache: message.swaFullCache, + batchSize: message.batchSize, evaluateText: message.evaluateText, exitAfterMeasurement: message.exitAfterMeasurement }); @@ -993,6 +1007,7 @@ type ParentToChildMessage = { minGpuLayers?: number, flashAttention?: boolean, swaFullCache?: boolean, + batchSize?: number, initialMaxContextSize?: number, maxContextSize?: number, minContextSize?: number, diff --git a/src/evaluator/LlamaChat/LlamaChat.ts b/src/evaluator/LlamaChat/LlamaChat.ts index ddbfbcec..ba81335b 100644 --- a/src/evaluator/LlamaChat/LlamaChat.ts +++ b/src/evaluator/LlamaChat/LlamaChat.ts @@ -2,8 +2,8 @@ import {DisposeAggregator, DisposedError, EventRelay, withLock} from "lifecycle- import {ChatWrapper} from "../../ChatWrapper.js"; import {LlamaContextSequence} from "../LlamaContext/LlamaContext.js"; import { - ChatHistoryItem, ChatModelFunctions, ChatModelResponse, ChatModelSegmentType, ChatUserMessage, - isChatModelResponseFunctionCall, isChatModelResponseSegment, LLamaContextualRepeatPenalty, Token, Tokenizer, allSegmentTypes + ChatHistoryItem, ChatModelFunctions, ChatModelResponse, ChatModelSegmentType, ChatUserMessage, isChatModelResponseFunctionCall, + isChatModelResponseSegment, LLamaContextualRepeatPenalty, Token, Tokenizer, allSegmentTypes, ChatWrapperGeneratedContextState } from "../../types.js"; import {GbnfJsonSchemaToType} from "../../utils/gbnfJson/types.js"; import {LlamaGrammar} from "../LlamaGrammar.js"; @@ -621,8 +621,12 @@ export class LlamaChat { while (true) { generateResponseState.startTokenLoop(); + generateResponseState.handleRerender(); + const shouldHandlePrefixTriggers = generateResponseState.isRerender; + generateResponseState.canAvoidReloadingHistory = false; await loadContextWindow(); + generateResponseState.isRerender = false; generateResponseState.addStopGenerationTriggersFromChatWrapper(); @@ -634,6 +638,14 @@ export class LlamaChat { } } + if (shouldHandlePrefixTriggers) { + const handlePrefixTriggersRes = await generateResponseState.handlePrefixTriggers( + loadContextWindowForFunctionCallingLoop + ); + if (handlePrefixTriggersRes != null) + return handlePrefixTriggersRes; + } + if (generateResponseState.functionEvaluationMode !== false) { const functionsCallsRes = await generateResponseState.enterFunctionCallingLoop( loadContextWindowForFunctionCallingLoop @@ -680,7 +692,7 @@ export class LlamaChat { if (maxTokensTriggerRes != null) return maxTokensTriggerRes; - if (generateResponseState.updateShouldContextShift()) + if (generateResponseState.handleShouldRerender() || generateResponseState.updateShouldContextShift()) break; if (await generateResponseState.handleBudgetTriggers()) { @@ -689,7 +701,7 @@ export class LlamaChat { await generateResponseState.createNewEvaluationIterator(); } - if (generateResponseState.updateShouldContextShift()) + if (generateResponseState.handleShouldRerender() || generateResponseState.updateShouldContextShift()) break; const abortRes = generateResponseState.handleAbortTrigger("model"); @@ -699,7 +711,7 @@ export class LlamaChat { generateResponseState.isFirstEvaluation = false; - if (generateResponseState.shouldContextShift) + if (generateResponseState.shouldRerender || generateResponseState.shouldContextShift) continue; break; @@ -820,6 +832,7 @@ export class LlamaChat { ), true ); + generateResponseState.isRerender = false; generateResponseState.functionEvaluationMode = false; generateResponseState.addStopGenerationTriggersFromChatWrapper(); @@ -829,6 +842,12 @@ export class LlamaChat { StopGenerationDetector.resolveLlamaTextTrigger(userTextSuffix, this.model.tokenizer) ); + generateResponseState.rerenderTriggers.forEach((trigger) => ( + generateResponseState.stopGenerationDetector.addStopTrigger( + StopGenerationDetector.resolveLlamaTextTrigger(trigger, this.model.tokenizer) + ) + )); + allSegmentTypes .map((segmentType) => getChatWrapperSegmentDefinition(this._chatWrapper.settings, segmentType)) .filter((segmentDefinition) => segmentDefinition != null) @@ -1334,12 +1353,12 @@ function generateContextTextThatEndsWithUserText( async function getContextWindow({ resolvedHistory, resolvedContextShift, - lastHistoryCompressionMetadata, pendingTokensCount = 0, isFirstEvaluation, + lastHistoryCompressionMetadata, pendingTokensCount = 0, isFirstEvaluation, isRerender, chatWrapper, lastEvaluationContextWindowHistory, minimumOverlapPercentageToPreventContextShift, sequence, minFreeContextTokens = 1, functions, documentFunctionParams, endWithUserText }: { resolvedHistory: ChatHistoryItem[], resolvedContextShift: Required, - lastHistoryCompressionMetadata: object | null | undefined, pendingTokensCount: number, isFirstEvaluation: boolean, + lastHistoryCompressionMetadata: object | null | undefined, pendingTokensCount: number, isFirstEvaluation: boolean, isRerender: boolean, chatWrapper: ChatWrapper, lastEvaluationContextWindowHistory?: ChatHistoryItem[], minimumOverlapPercentageToPreventContextShift: number, sequence?: LlamaContextSequence, minFreeContextTokens?: number, functions?: ChatModelFunctions, documentFunctionParams?: boolean, endWithUserText: boolean @@ -1347,7 +1366,11 @@ async function getContextWindow({ history: ChatHistoryItem[], stopGenerationTriggers: LlamaText[], tokens: Token[], removeRawFromHistory: boolean, newHistoryCompressionMetadata: object | null | undefined, ignoreStartText: LlamaText[], functionCallInitiallyEngaged: boolean, - disengageInitiallyEngagedFunctionCall: LlamaText[], userTextSuffix?: LlamaText + disengageInitiallyEngagedFunctionCall: LlamaText[], userTextSuffix?: LlamaText, + prefixTriggers: ChatWrapperGeneratedContextState["prefixTriggers"], + noPrefixTrigger: ChatWrapperGeneratedContextState["noPrefixTrigger"], + rerender: ChatWrapperGeneratedContextState["rerender"], + detectFunctionCalls: ChatWrapperGeneratedContextState["detectFunctionCalls"] }> { if (sequence == null) throw new DisposedError(); @@ -1356,7 +1379,7 @@ async function getContextWindow({ const context = sequence.context; let removeRawFromHistory = false; - if (isFirstEvaluation && lastEvaluationContextWindowHistory != null && sequence.isLoadedToMemory) { + if ((isFirstEvaluation || isRerender) && lastEvaluationContextWindowHistory != null && sequence.isLoadedToMemory) { const newContextWindow = lastEvaluationContextWindowHistory.slice(); if (endWithUserText) { @@ -1371,7 +1394,10 @@ async function getContextWindow({ response: [] }); - const {contextText, stopGenerationTriggers, ignoreStartText, functionCall, userTextSuffix} = generateContextText( + const { + contextText, stopGenerationTriggers, ignoreStartText, functionCall, userTextSuffix, + prefixTriggers, noPrefixTrigger, rerender, detectFunctionCalls + } = generateContextText( endWithUserText, chatWrapper, { @@ -1386,7 +1412,7 @@ async function getContextWindow({ const existingEvaluationPercentage = firstDifferentIndex / tokens.length; - if (existingEvaluationPercentage >= minimumOverlapPercentageToPreventContextShift) + if (isRerender || existingEvaluationPercentage >= minimumOverlapPercentageToPreventContextShift) return { history: newContextWindow, stopGenerationTriggers, @@ -1396,7 +1422,11 @@ async function getContextWindow({ ignoreStartText: ignoreStartText ?? [], functionCallInitiallyEngaged: functionCall?.initiallyEngaged ?? false, disengageInitiallyEngagedFunctionCall: functionCall?.disengageInitiallyEngaged ?? [], - userTextSuffix + userTextSuffix, + prefixTriggers, + noPrefixTrigger, + rerender, + detectFunctionCalls }; } } @@ -1426,7 +1456,10 @@ async function getContextWindow({ documentFunctionParams }); - const {contextText, stopGenerationTriggers, ignoreStartText, functionCall, userTextSuffix} = generateContextText( + const { + contextText, stopGenerationTriggers, ignoreStartText, functionCall, userTextSuffix, + prefixTriggers, noPrefixTrigger, rerender, detectFunctionCalls + } = generateContextText( endWithUserText, chatWrapper, { @@ -1445,12 +1478,19 @@ async function getContextWindow({ ignoreStartText: ignoreStartText ?? [], functionCallInitiallyEngaged: functionCall?.initiallyEngaged ?? false, disengageInitiallyEngagedFunctionCall: functionCall?.disengageInitiallyEngaged ?? [], - userTextSuffix + userTextSuffix, + prefixTriggers, + noPrefixTrigger, + rerender, + detectFunctionCalls }; } { - const {contextText, stopGenerationTriggers, ignoreStartText, functionCall, userTextSuffix} = generateContextText( + const { + contextText, stopGenerationTriggers, ignoreStartText, functionCall, userTextSuffix, + prefixTriggers, noPrefixTrigger, rerender, detectFunctionCalls + } = generateContextText( endWithUserText, chatWrapper, { @@ -1471,7 +1511,11 @@ async function getContextWindow({ ignoreStartText: ignoreStartText ?? [], functionCallInitiallyEngaged: functionCall?.initiallyEngaged ?? false, disengageInitiallyEngagedFunctionCall: functionCall?.disengageInitiallyEngaged ?? [], - userTextSuffix + userTextSuffix, + prefixTriggers, + noPrefixTrigger, + rerender, + detectFunctionCalls }; } @@ -1502,7 +1546,10 @@ async function getContextWindow({ documentFunctionParams }); - const {contextText, stopGenerationTriggers, ignoreStartText, functionCall, userTextSuffix} = generateContextText( + const { + contextText, stopGenerationTriggers, ignoreStartText, functionCall, userTextSuffix, + prefixTriggers, noPrefixTrigger, rerender, detectFunctionCalls + } = generateContextText( endWithUserText, chatWrapper, { @@ -1521,7 +1568,11 @@ async function getContextWindow({ ignoreStartText: ignoreStartText ?? [], functionCallInitiallyEngaged: functionCall?.initiallyEngaged ?? false, disengageInitiallyEngagedFunctionCall: functionCall?.disengageInitiallyEngaged ?? [], - userTextSuffix + userTextSuffix, + prefixTriggers, + noPrefixTrigger, + rerender, + detectFunctionCalls }; } @@ -1565,6 +1616,7 @@ class GenerateResponseState>; private functionsGrammar?: FunctionCallNameGrammar> | FunctionCallParamsGrammar>; private functionsEvaluationState: LlamaGrammarEvaluationState | undefined; + public functionSyntaxStartDetectorEnabled: boolean = true; private readonly streamRegulator = new TokenStreamRegulator(); public readonly stopGenerationDetector = new StopGenerationDetector(); @@ -1580,6 +1632,7 @@ class GenerateResponseState[number] + }> = new Map(); + public noPrefixTrigger: ChatWrapperGeneratedContextState["noPrefixTrigger"] = undefined; + public rerenderTriggers: LlamaText[] = []; + public rerenderTriggerDetector: StopGenerationDetector = new StopGenerationDetector(); + public rerenderActions: Exclude["action"] = undefined; + public tokens: Token[] = []; // token evaluation loop @@ -1880,7 +1944,8 @@ class GenerateResponseState prefixDetector.addStopTrigger(stopTrigger)); + + this.prefixTriggerDetectors.set(prefixDetector, {inject: trigger.inject, trigger}); + + const inject = trigger.inject; + if (inject != null && inject.values.length > 0) { + const fullPrefixDetector = new StopGenerationDetector(); + StopGenerationDetector + .resolveStopTriggers( + trigger.triggers.map((trigger) => LlamaText([trigger, inject])), + this.llamaChat.model.tokenizer + ) + .forEach((stopTrigger) => fullPrefixDetector.addStopTrigger(stopTrigger)); + + this.prefixTriggerDetectors.set(fullPrefixDetector, {trigger}); + } + } + + this.noPrefixTrigger = noPrefixTrigger; + if (this.noPrefixTrigger?.type === "functionCall" && !this.functionsEnabled) + this.noPrefixTrigger = undefined; + + this.rerenderTriggers = rerender?.triggers ?? []; + this.rerenderTriggerDetector.clearInProgressStops(); + this.rerenderTriggerDetector.clearTriggeredStops(); + this.rerenderTriggerDetector = new StopGenerationDetector(); + this.rerenderActions = rerender?.action; + + this.functionSyntaxStartDetectorEnabled = detectFunctionCalls ?? true; + if (!this.functionSyntaxStartDetectorEnabled) + this.functionSyntaxStartDetector.clearInProgressStops(); + + if (rerender?.triggers != null) { + StopGenerationDetector.resolveStopTriggers(rerender.triggers, this.llamaChat.model.tokenizer) + .map((stopTrigger) => this.rerenderTriggerDetector.addStopTrigger(stopTrigger)); + } + } + this.lastHistoryCompressionMetadata = newHistoryCompressionMetadata; this.lastContextWindowHistory = contextWindowHistory; this.segmentHandler.resetContextWindow(); @@ -2043,6 +2178,7 @@ class GenerateResponseState Promise) { + const reloadTokens = async () => { + this.startTokenLoop(); + await loadContextWindow(); + }; + const injectTokens = async (text?: LlamaText, alignStateTokens: boolean = false) => { + if (text == null) + return; + + const tokens = text.tokenize(this.llamaChat.model.tokenizer, "trimLeadingSpace"); + if (tokens.length === 0) + return; + + pushAll(this.prefixTriggerTokens, tokens); + + if (alignStateTokens) + await reloadTokens(); + }; + + if (this.prefixTriggerDetectors.size === 0) { + if (this.noPrefixTrigger?.type === "functionCall" && this.chatWrapper.settings.functions != null) { + await injectTokens(this.noPrefixTrigger.inject, true); + + this.functionEvaluationMode = "functionName"; + } else if (this.noPrefixTrigger?.type === "segment") { + await injectTokens(this.noPrefixTrigger.inject, true); + + this.segmentHandler.openSegment(this.noPrefixTrigger.segmentType); + } else if (this.noPrefixTrigger?.type === "response") + await injectTokens(this.noPrefixTrigger.inject, true); + + return undefined; + } + + const generatedTokens: Token[] = []; + let isFirstToken = true; + let continueGeneration = true; + + for await (const tokens of this.evaluateWithContextShift(loadContextWindow)) { + pushAll(generatedTokens, tokens); + + for (const [triggerDetector, {trigger, inject}] of [...this.prefixTriggerDetectors.entries()]) { + triggerDetector.recordGeneration({ + text: this.currentText, + tokens: this.currentTokens, + startNewChecks: isFirstToken, + triggerMustStartWithGeneration: true + }); + + if (triggerDetector.hasTriggeredStops) { + const { + firstRemainingGenerationAfterStop, + stopTrigger + } = StopGenerationDetector.getFirstRemainingGenerationAfterStop(triggerDetector.getTriggeredStops()); + const remainingTokens = typeof firstRemainingGenerationAfterStop === "string" + ? firstRemainingGenerationAfterStop === "" + ? [] + : this.llamaChat.model.tokenize(firstRemainingGenerationAfterStop, false, "trimLeadingSpace") + : (firstRemainingGenerationAfterStop ?? []); + const triggerTokens = (stopTrigger == null || remainingTokens.length === 0) + ? generatedTokens + : stopTrigger.flatMap((item) => { + if (typeof item === "string") + return this.llamaChat.model.tokenize(item, false, "trimLeadingSpace"); + + return [item]; + }); + + this.streamRegulator.reset(); + + if (trigger.type === "segment") { + pushAll(this.prefixTriggerTokens, triggerTokens); + if (inject != null) + await injectTokens(inject); + + await reloadTokens(); + this.segmentHandler.openSegment(trigger.segmentType); + } else if (trigger.type === "response") { + pushAll(this.prefixTriggerTokens, triggerTokens); + + if (inject != null) + await injectTokens(inject); + + await reloadTokens(); + } else if (trigger.type === "functionCall") { + if (trigger.replaceTrigger === false) + pushAll(this.prefixTriggerTokens, triggerTokens); + + if (inject != null) + await injectTokens(inject); + + await reloadTokens(); + this.functionEvaluationMode = "functionName"; + } else + void (trigger satisfies never); + + this.prefixTriggerDetectors.clear(); + continueGeneration = false; + break; + } else if (!triggerDetector.hasInProgressStops) + this.prefixTriggerDetectors.delete(triggerDetector); + } + + if (this.prefixTriggerDetectors.size === 0 && continueGeneration) { + this.streamRegulator.reset(); + continueGeneration = false; + + if (this.noPrefixTrigger?.type === "functionCall" && this.chatWrapper.settings.functions != null) { + await injectTokens(this.noPrefixTrigger.inject, true); + + this.functionEvaluationMode = "functionName"; + } else if (this.noPrefixTrigger?.type === "segment") { + await injectTokens(this.noPrefixTrigger.inject, true); + + this.segmentHandler.openSegment(this.noPrefixTrigger.segmentType); + } else if (this.noPrefixTrigger?.type === "response") + await injectTokens(this.noPrefixTrigger.inject, true); + else + this.streamRegulator.addChunk({ + tokens: generatedTokens, + text: this.llamaChat.model.detokenize(generatedTokens, false, this.getLastTokens()) + }); + } + + + isFirstToken = false; + + if (!continueGeneration) + break; + + const stopRes = this.handleAbortTrigger("model") ?? this.handleMaxTokensTrigger("model"); + if (stopRes != null) + return stopRes; + } + + return undefined; + } + public async enterFunctionCallingLoop(loadContextWindow: () => Promise) { if (!this.functionsEnabled) { this.functionEvaluationMode = false; @@ -2721,6 +2995,9 @@ class GenerateResponseState= this.llamaChat.context.contextSize - 1; return this.shouldContextShift; @@ -3159,6 +3448,10 @@ class SegmentHandler { const s1MB = Math.pow(1024, 2); const tensorInfo = this._ggufFileInfo.fullTensorInfo ?? []; + const expertCount = llmData?.expert_count ?? 0; + const headCount = llmData?.attention?.head_count ?? 0; + const embeddingLength = llmData?.embedding_length ?? 0; let defaultCalculationAdjustment = 0; @@ -237,10 +244,6 @@ export class GgufInsights { return 0; if (this._ggufFileInfo.metadata.general?.architecture === GgufArchitectureType.llama) { - const expertCount = this._ggufFileInfo.architectureMetadata.expert_count ?? 0; - const headCount = this._ggufFileInfo.architectureMetadata.attention?.head_count ?? 0; - const embeddingLength = llmData.embedding_length ?? 0; - if (expertCount > 0) { const expertsUsedCount = this._ggufFileInfo.architectureMetadata.expert_used_count ?? 2; @@ -307,6 +310,10 @@ export class GgufInsights { // ) // ); // } + } else if (expertCount > 0) { + const expertsUsedCount = this._ggufFileInfo.architectureMetadata.expert_used_count ?? 2; + + return int32TBytes * batchSize * (((expertsUsedCount + 1) * embeddingLength) + (kvSize * headCount)); } const totalElements = tensorInfo.length === 0 @@ -764,8 +771,16 @@ function getSwaPatternForArchitecture(architecture?: GgufArchitectureType): numb return 2; case GgufArchitectureType.gemma3: return 6; + case GgufArchitectureType.gemma3n: + return 5; case GgufArchitectureType.cohere2: return 4; + case GgufArchitectureType.exaone4: + return 4; + case GgufArchitectureType.gptOss: + return 2; + case GgufArchitectureType.smallthinker: + return 4; } return 1; diff --git a/src/gguf/types/GgufMetadataTypes.ts b/src/gguf/types/GgufMetadataTypes.ts index a36948fa..3058aa98 100644 --- a/src/gguf/types/GgufMetadataTypes.ts +++ b/src/gguf/types/GgufMetadataTypes.ts @@ -158,7 +158,8 @@ export enum GgufFileType { MOSTLY_Q4_0_4_8 = 34, // deprecated MOSTLY_Q4_0_8_8 = 35, // deprecated MOSTLY_TQ1_0 = 36, - MOSTLY_TQ2_0 = 37 + MOSTLY_TQ2_0 = 37, + MOSTLY_MXFP4_MOE = 38 } diff --git a/src/gguf/types/GgufTensorInfoTypes.ts b/src/gguf/types/GgufTensorInfoTypes.ts index 8b7f615a..ed750329 100644 --- a/src/gguf/types/GgufTensorInfoTypes.ts +++ b/src/gguf/types/GgufTensorInfoTypes.ts @@ -59,5 +59,6 @@ export const enum GgmlType { TQ2_0 = 35, IQ4_NL_4_4 = 36, IQ4_NL_4_8 = 37, - IQ4_NL_8_8 = 38 + IQ4_NL_8_8 = 38, + MXFP4 = 39 // MXFP4 (1 block) } diff --git a/src/gguf/utils/ggufQuantNames.ts b/src/gguf/utils/ggufQuantNames.ts index 4beddbd4..abff8a8f 100644 --- a/src/gguf/utils/ggufQuantNames.ts +++ b/src/gguf/utils/ggufQuantNames.ts @@ -3,6 +3,7 @@ import {GgufFileType} from "../types/GgufMetadataTypes.js"; export const ggufQuantNames = new Map([ ["Q4_0", GgufFileType.MOSTLY_Q4_0], ["Q4_1", GgufFileType.MOSTLY_Q4_1], + ["MXFP4", GgufFileType.MOSTLY_MXFP4_MOE], ["Q5_0", GgufFileType.MOSTLY_Q5_0], ["Q5_1", GgufFileType.MOSTLY_Q5_1], ["IQ2_XXS", GgufFileType.MOSTLY_IQ2_XXS], diff --git a/src/index.ts b/src/index.ts index 2332caaa..ffd02c80 100644 --- a/src/index.ts +++ b/src/index.ts @@ -60,6 +60,7 @@ import {FalconChatWrapper} from "./chatWrappers/FalconChatWrapper.js"; import {AlpacaChatWrapper} from "./chatWrappers/AlpacaChatWrapper.js"; import {FunctionaryChatWrapper} from "./chatWrappers/FunctionaryChatWrapper.js"; import {GemmaChatWrapper} from "./chatWrappers/GemmaChatWrapper.js"; +import {HarmonyChatWrapper} from "./chatWrappers/HarmonyChatWrapper.js"; import {TemplateChatWrapper, type TemplateChatWrapperOptions} from "./chatWrappers/generic/TemplateChatWrapper.js"; import { JinjaTemplateChatWrapper, type JinjaTemplateChatWrapperOptions, type JinjaTemplateChatWrapperOptionsConvertMessageFormat @@ -95,7 +96,8 @@ import { type ChatModelResponse, type ChatSessionModelFunction, type ChatSessionModelFunctions, type ChatSystemMessage, type ChatUserMessage, type Token, type Tokenizer, type Detokenizer, isChatModelResponseFunctionCall, isChatModelResponseSegment, type LLamaContextualRepeatPenalty, type ChatWrapperSettings, type ChatWrapperSettingsSegment, - type ChatWrapperGenerateContextStateOptions, type ChatWrapperGeneratedContextState, type ChatWrapperGenerateInitialHistoryOptions + type ChatWrapperGenerateContextStateOptions, type ChatWrapperGeneratedContextState, type ChatWrapperGeneratedPrefixTriggersContextState, + type ChatWrapperGeneratedInitiallyEngagedFunctionsContextState, type ChatWrapperGenerateInitialHistoryOptions } from "./types.js"; import { type GbnfJsonArraySchema, type GbnfJsonBasicSchema, type GbnfJsonConstSchema, type GbnfJsonEnumSchema, type GbnfJsonStringSchema, @@ -205,6 +207,8 @@ export { type ChatWrapperSettingsSegment, type ChatWrapperGenerateContextStateOptions, type ChatWrapperGeneratedContextState, + type ChatWrapperGeneratedPrefixTriggersContextState, + type ChatWrapperGeneratedInitiallyEngagedFunctionsContextState, type ChatWrapperGenerateInitialHistoryOptions, EmptyChatWrapper, DeepSeekChatWrapper, @@ -220,6 +224,7 @@ export { AlpacaChatWrapper, FunctionaryChatWrapper, GemmaChatWrapper, + HarmonyChatWrapper, TemplateChatWrapper, type TemplateChatWrapperOptions, JinjaTemplateChatWrapper, diff --git a/src/types.ts b/src/types.ts index 488cb217..85940365 100644 --- a/src/types.ts +++ b/src/types.ts @@ -105,7 +105,14 @@ export type ChatWrapperSettings = { /** Chain of Thought text segment */ readonly thought?: ChatWrapperSettingsSegment & { reopenAfterFunctionCalls?: boolean - } + }, + + /** + * Comment segment. + * + * Used by models such as gpt-oss. + */ + readonly comment?: ChatWrapperSettingsSegment } }; export type ChatWrapperSettingsSegment = { @@ -124,14 +131,172 @@ export type ChatWrapperCheckModelCompatibilityParams = { fileInfo?: GgufFileInfo }; -export type ChatWrapperGeneratedContextState = { +export type ChatWrapperGeneratedContextState = + ChatWrapperGeneratedPrefixTriggersContextState | ChatWrapperGeneratedInitiallyEngagedFunctionsContextState; + +export type ChatWrapperGeneratedPrefixTriggersContextState = { + /** + * The rendered chat to load into the context sequence state + */ + contextText: LlamaText, + + /** + * Triggers to stop the generation + */ + stopGenerationTriggers: LlamaText[], + + /** + * When this option is set, after evaluating the `contextText`, + * it'll look for any of the triggers to be the first generated output. + * + * When a trigger is matched, its type will determine the mode to enter to, a segment to open, + * or to continue the generation as a textual output. + * + * If all the triggers are unmatched, the `noPrefixTrigger` will take effect. + */ + prefixTriggers?: Array<{ + triggers: LlamaText[], + + /** + * Enter into function calling mode. + * + * Entering this mode will put the function calling prefix into the context sequence state + * and force it to choose a function to call. + * + * If no functions are available, this trigger will be ignored. + */ + type: "functionCall", + + /** + * Remove the trigger tokens and replace them with the function call prefix. + * + * Defaults to `true`. + */ + replaceTrigger?: boolean, + + /** + * Text to inject into the context sequence state when this trigger is matched. + */ + inject?: LlamaText + } | { + triggers: LlamaText[], + + /** + * Open a segment of the specified type. + */ + type: "segment", + + /** + * Type of the segment to open. + */ + segmentType: ChatModelSegmentType, + + /** + * Text to inject into the context sequence state when this trigger is matched. + */ + inject?: LlamaText + } | { + triggers: LlamaText[], + + /** + * Continue the generation as a textual output. + */ + type: "response", + + /** + * Text to inject into the context sequence state when this trigger is matched. + */ + inject?: LlamaText + }>, + + /** + * When no prefix triggers are matched or non are provided, after evaluating the `contextText`, + * perform the action specified by this option. + */ + noPrefixTrigger?: { + /** + * Enter into function calling mode. + * + * Entering this mode will put the function calling prefix into the context sequence state + * and force it to choose a function to call. + * + * If no functions are available, this action will be ignored. + */ + type: "functionCall", + + /** + * Text to inject into the context sequence state when this action is performed. + */ + inject: LlamaText + } | { + /** + * Open a segment of the specified type. + */ + type: "segment", + + /** + * Type of the segment to open. + */ + segmentType: ChatModelSegmentType, + + /** + * Text to inject into the context sequence state when this action is performed. + */ + inject: LlamaText + } | { + /** + * Continue the generation as a textual output. + */ + type: "response", + + /** + * Text to inject into the context sequence state when this action is performed. + */ + inject: LlamaText + }, + + /** + * Trigger a rerender of the chat template when any of the provided triggers are matched. + * + * When a rerender it triggered, the chat template will be rendered again and the next trigger options will come into effect again, + * so if no prefix triggers are required after the rerender, make sure to not provide any. + * + * When a rerender is triggered, the `action` will be performed. + */ + rerender?: { + triggers: LlamaText[], + + /** + * Action to perform when the rerender is triggered. + * + * - **`"closeResponseItem"`**: Close the current segment or stop the textual response generation. + */ + action?: "closeResponseItem" + }, + + /** + * Whether to detect the function calling prefix syntax in the current text generation to dynamically enter into function calling mode. + * + * If it's only possible to enter function calling using a prefix trigger, then set this option to `false`. + */ + detectFunctionCalls?: boolean, + + ignoreStartText?: never, + functionCall?: never +}; +export type ChatWrapperGeneratedInitiallyEngagedFunctionsContextState = { contextText: LlamaText, stopGenerationTriggers: LlamaText[], ignoreStartText?: LlamaText[], functionCall?: { initiallyEngaged: boolean, disengageInitiallyEngaged: LlamaText[] - } + }, + + detectFunctionCalls?: never, + prefixTriggers?: never, + noPrefixTrigger?: never, + rerender?: never }; export type ChatWrapperGenerateInitialHistoryOptions = { @@ -169,7 +334,7 @@ export type ChatModelFunctionCall = { }; export const allSegmentTypes = ["thought"] as const satisfies ChatModelSegmentType[]; -export type ChatModelSegmentType = "thought"; +export type ChatModelSegmentType = "thought" | "comment"; export type ChatModelSegment = { type: "segment", segmentType: ChatModelSegmentType, diff --git a/src/utils/getChatWrapperSegmentDefinition.ts b/src/utils/getChatWrapperSegmentDefinition.ts index 02f8a7e6..4d699835 100644 --- a/src/utils/getChatWrapperSegmentDefinition.ts +++ b/src/utils/getChatWrapperSegmentDefinition.ts @@ -6,6 +6,8 @@ export function getChatWrapperSegmentDefinition( ): ChatWrapperSettingsSegment | undefined { if (segmentType === "thought") return chatWrapperSetting.segments?.thought; + else if (segmentType === "comment") + return chatWrapperSetting.segments?.comment; void (segmentType satisfies never); return undefined; diff --git a/test/modelDependent/bgeReranker/rank.test.ts b/test/modelDependent/bgeReranker/rank.test.ts index e0ffea64..7ebc731d 100644 --- a/test/modelDependent/bgeReranker/rank.test.ts +++ b/test/modelDependent/bgeReranker/rank.test.ts @@ -43,7 +43,7 @@ describe("bgeReranker", () => { const highestRankDocument = documents[highestRankIndex]; expect(highestRankDocument).to.eql("Mount Everest is the tallest mountain in the world"); - expect(simplifyRanks([highestRank])[0]).toMatchInlineSnapshot(`0.014774031693273055`); + expect(simplifyRanks([highestRank])[0]).toMatchInlineSnapshot("0.014774031693273055"); expect(simplifyRanks(ranks)).toMatchInlineSnapshot(` [ 0.00002039908727992137, @@ -97,7 +97,7 @@ describe("bgeReranker", () => { const highestRankDocument = documents[highestRankIndex]; expect(highestRankDocument).to.eql("Mount Everest is the tallest mountain in the world"); - expect(simplifyRanks([highestRank])[0]).toMatchInlineSnapshot(`0.014774031693273055`); + expect(simplifyRanks([highestRank])[0]).toMatchInlineSnapshot("0.014774031693273055"); expect(simplifyRanks(ranks)).toMatchInlineSnapshot(` [ 0.00002039908727992137, diff --git a/test/standalone/chatWrappers/HarmonyChatWrapper.test.ts b/test/standalone/chatWrappers/HarmonyChatWrapper.test.ts new file mode 100644 index 00000000..92fdc1a9 --- /dev/null +++ b/test/standalone/chatWrappers/HarmonyChatWrapper.test.ts @@ -0,0 +1,411 @@ +import {describe, expect, test} from "vitest"; +import {ChatHistoryItem, defineChatSessionFunction, HarmonyChatWrapper} from "../../../src/index.js"; +import {defaultChatSystemPrompt} from "../../../src/config.js"; + + +describe("HarmonyChatWrapper", () => { + const todayDate = new Date("2025-08-05T00:00:00Z"); + + const conversationHistory: ChatHistoryItem[] = [{ + type: "system", + text: defaultChatSystemPrompt + }, { + type: "user", + text: "Hi there!" + }, { + type: "model", + response: [ + { + type: "segment", + segmentType: "thought", + text: "Let me think how to respond to this.", + ended: true + }, + "Hello!" + ] + }, { + type: "user", + text: "How are you?" + }, { + type: "model", + response: [ + { + type: "segment", + segmentType: "thought", + text: "Let me think how to answer", + ended: true + }, + { + type: "segment", + segmentType: "comment", + text: "This is a question about my state", + ended: true + }, + "I'm good, how are you?" + ] + }]; + + const functions = { + getRandomNumber: defineChatSessionFunction({ + description: "Get a random number", + params: { + type: "object", + properties: { + min: { + type: "number" + }, + max: { + type: "number" + } + } + }, + async handler(params) { + return Math.floor(Math.random() * (params.max - params.min + 1) + params.min); + } + }), + notifyOwner: defineChatSessionFunction({ + description: "Send a notification to the owner, and create sub notifications", + params: { + $ref: "#/$defs/notification", + $defs: { + notification: { + type: "object", + properties: { + message: { + type: "string" + }, + subNotifications: { + type: "array", + items: { + $ref: "#/$defs/notification" + } + } + } + } + } + }, + handler(notification) { + return "Notification created: " + notification.message; + } + }), + notifyOwner2: defineChatSessionFunction({ + description: "Send a notification to the owner, and create sub notifications", + params: { + $ref: "#/$defs/notification", + $defs: { + notification: { + type: "object", + properties: { + message: { + type: "string", + description: "Notification message" + }, + subNotifications: { + type: "array", + description: "Sub notifications", + items: { + $ref: "#/$defs/notification" + } + } + } + } + } + }, + handler(notification) { + return "Notification created: " + notification.message; + } + }), + func1: defineChatSessionFunction({ + description: "Some function", + params: { + type: "object", + properties: { + message: { + type: "string", + description: "Some message", + minLength: 3, + maxLength: 10 + }, + words: { + type: "array", + description: "Some words", + items: { + type: "string" + }, + minItems: 2, + maxItems: 5 + }, + headers: { + type: "object", + description: "Some headers", + additionalProperties: { + type: "string" + }, + minProperties: 4, + maxProperties: 12 + }, + mappings: { + type: "object", + description: "Some mappings", + properties: { + a: { + type: "boolean" + }, + b: { + type: "number" + }, + c: { + type: ["string", "null"] + } + }, + additionalProperties: { + type: "string" + }, + minProperties: 4, + maxProperties: 12 + } + } + }, + handler(params) { + + } + }) + }; + const conversationHistory2: ChatHistoryItem[] = [{ + type: "system", + text: defaultChatSystemPrompt + }, { + type: "user", + text: "Hi there!" + }, { + type: "model", + response: ["Hello!"] + }, { + type: "user", + text: "Role a dice twice and tell me the total result" + }, { + type: "model", + response: [ + { + type: "functionCall", + name: "getRandomNumber", + description: "Get a random number", + params: { + min: 1, + max: 6 + }, + result: 3 + }, + { + type: "functionCall", + name: "getRandomNumber", + description: "Get a random number", + params: { + min: 1, + max: 6 + }, + result: 4 + }, + "The total result of rolling the dice twice is 3 + 4 = 7." + ] + }]; + + test("should generate valid context text", () => { + const chatWrapper = new HarmonyChatWrapper({todayDate, keepOnlyLastThought: false}); + const {contextText} = chatWrapper.generateContextState({chatHistory: conversationHistory}); + + expect(contextText).toMatchInlineSnapshot(` + LlamaText([ + new SpecialTokensText("<|start|>system<|message|>"), + "You are ChatGPT, a large language model trained by OpenAI. + Knowledge cutoff: 2024-06 + Current date: 2025-08-05 + + Reasoning: medium + + # Valid channels: analysis, commentary, final. Channel must be included for every message.", + new SpecialTokensText("<|end|><|start|>developer<|message|>"), + "# Instruction + + You are a helpful, respectful and honest assistant. Always answer as helpfully as possible. + If a question does not make any sense, or is not factually coherent, explain why instead of answering something incorrectly. If you don't know the answer to a question, don't share false information.", + new SpecialTokensText("<|end|><|start|>user<|message|>"), + "Hi there!", + new SpecialTokensText("<|end|><|start|>assistant<|channel|>analysis<|message|>"), + "Let me think how to respond to this.", + new SpecialTokensText("<|end|><|start|>assistant<|channel|>final<|message|>"), + "Hello!", + new SpecialTokensText("<|end|><|start|>user<|message|>"), + "How are you?", + new SpecialTokensText("<|end|><|start|>assistant<|channel|>analysis<|message|>"), + "Let me think how to answer", + new SpecialTokensText("<|end|><|start|>assistant<|channel|>commentary<|message|>"), + "This is a question about my state", + new SpecialTokensText("<|end|><|start|>assistant<|channel|>final<|message|>"), + "I'm good, how are you?", + ]) + `); + + const chatWrapper2 = new HarmonyChatWrapper({todayDate}); + const {contextText: contextText2} = chatWrapper2.generateContextState({ + chatHistory: conversationHistory2, + availableFunctions: functions + }); + + expect(contextText2).toMatchInlineSnapshot(` + LlamaText([ + new SpecialTokensText("<|start|>system<|message|>"), + "You are ChatGPT, a large language model trained by OpenAI. + Knowledge cutoff: 2024-06 + Current date: 2025-08-05 + + Reasoning: medium + + # Valid channels: analysis, commentary, final. Channel must be included for every message. + Calls to these tools must go to the commentary channel: 'functions'.", + new SpecialTokensText("<|end|><|start|>developer<|message|>"), + "# Instructions + + You are a helpful, respectful and honest assistant. Always answer as helpfully as possible. + If a question does not make any sense, or is not factually coherent, explain why instead of answering something incorrectly. If you don't know the answer to a question, don't share false information. + + # Tools + + ## functions + + namespace functions { + + // Get a random number + type getRandomNumber = (_: {min: number, max: number}) => any; + + // Send a notification to the owner, and create sub notifications + type notifyOwner = (_: /* Type: notification */ {message: string, subNotifications: (/* notification type */ any)[]}) => any; + + // Send a notification to the owner, and create sub notifications + type notifyOwner2 = (_: /* Type: notification */ { + // Notification message + message: string, + + // Sub notifications + subNotifications: (/* notification type */ any)[] + }) => any; + + // Some function + type func1 = (_: { + // Some message + // minimum length: 3, maximum length: 10 + message: string, + + // Some words + // maximum items: 5 + words: [string, string, ...string[]], + + // Some headers + // minimum number of properties: 4, maximum number of properties: 12 + headers: {[key: string]: string}, + + // Some mappings + // minimum number of properties: 4, maximum number of properties: 12 + mappings: {a: boolean, b: number, c: string | null} & {[key: string]: string} + }) => any; + + } // namespace functions", + new SpecialTokensText("<|end|><|start|>user<|message|>"), + "Hi there!", + new SpecialTokensText("<|end|><|start|>assistant<|channel|>final<|message|>"), + "Hello!", + new SpecialTokensText("<|end|><|start|>user<|message|>"), + "Role a dice twice and tell me the total result", + new SpecialTokensText("<|end|><|start|>assistant<|channel|>commentary to="), + "functions.getRandomNumber", + new SpecialTokensText("<|constrain|>json<|message|>"), + "{"min": 1, "max": 6}", + new SpecialTokensText("<|call|><|start|>"), + "functions.getRandomNumber", + new SpecialTokensText(" to=assistant<|channel|>commentary<|message|>"), + "3", + new SpecialTokensText("<|end|><|start|>assistant<|channel|>commentary to="), + "functions.getRandomNumber", + new SpecialTokensText("<|constrain|>json<|message|>"), + "{"min": 1, "max": 6}", + new SpecialTokensText("<|call|><|start|>"), + "functions.getRandomNumber", + new SpecialTokensText(" to=assistant<|channel|>commentary<|message|>"), + "4", + new SpecialTokensText("<|end|><|start|>assistant<|channel|>final<|message|>"), + "The total result of rolling the dice twice is 3 + 4 = 7.", + ]) + `); + + const chatWrapper3 = new HarmonyChatWrapper({todayDate}); + const {contextText: contextText3} = chatWrapper3.generateContextState({chatHistory: conversationHistory}); + const {contextText: contextText3WithOpenModelResponse} = chatWrapper3.generateContextState({ + chatHistory: [ + ...conversationHistory, + { + type: "model", + response: [] + } + ] + }); + + expect(contextText3).toMatchInlineSnapshot(` + LlamaText([ + new SpecialTokensText("<|start|>system<|message|>"), + "You are ChatGPT, a large language model trained by OpenAI. + Knowledge cutoff: 2024-06 + Current date: 2025-08-05 + + Reasoning: medium + + # Valid channels: analysis, commentary, final. Channel must be included for every message.", + new SpecialTokensText("<|end|><|start|>developer<|message|>"), + "# Instruction + + You are a helpful, respectful and honest assistant. Always answer as helpfully as possible. + If a question does not make any sense, or is not factually coherent, explain why instead of answering something incorrectly. If you don't know the answer to a question, don't share false information.", + new SpecialTokensText("<|end|><|start|>user<|message|>"), + "Hi there!", + new SpecialTokensText("<|end|><|start|>assistant<|channel|>final<|message|>"), + "Hello!", + new SpecialTokensText("<|end|><|start|>user<|message|>"), + "How are you?", + new SpecialTokensText("<|end|><|start|>assistant<|channel|>analysis<|message|>"), + "Let me think how to answer", + new SpecialTokensText("<|end|><|start|>assistant<|channel|>commentary<|message|>"), + "This is a question about my state", + new SpecialTokensText("<|end|><|start|>assistant<|channel|>final<|message|>"), + "I'm good, how are you?", + ]) + `); + + expect(contextText3WithOpenModelResponse).toMatchInlineSnapshot(` + LlamaText([ + new SpecialTokensText("<|start|>system<|message|>"), + "You are ChatGPT, a large language model trained by OpenAI. + Knowledge cutoff: 2024-06 + Current date: 2025-08-05 + + Reasoning: medium + + # Valid channels: analysis, commentary, final. Channel must be included for every message.", + new SpecialTokensText("<|end|><|start|>developer<|message|>"), + "# Instruction + + You are a helpful, respectful and honest assistant. Always answer as helpfully as possible. + If a question does not make any sense, or is not factually coherent, explain why instead of answering something incorrectly. If you don't know the answer to a question, don't share false information.", + new SpecialTokensText("<|end|><|start|>user<|message|>"), + "Hi there!", + new SpecialTokensText("<|end|><|start|>assistant<|channel|>final<|message|>"), + "Hello!", + new SpecialTokensText("<|end|><|start|>user<|message|>"), + "How are you?", + new SpecialTokensText("<|end|><|start|>assistant<|channel|>commentary<|message|>"), + "This is a question about my state", + new SpecialTokensText("<|end|><|start|>assistant<|channel|>final<|message|>"), + "I'm good, how are you?", + new SpecialTokensText("<|end|><|start|>assistant"), + ]) + `); + }); +}); diff --git a/test/standalone/chatWrappers/utils/jinjaTemplates.ts b/test/standalone/chatWrappers/utils/jinjaTemplates.ts new file mode 100644 index 00000000..a896d9b7 --- /dev/null +++ b/test/standalone/chatWrappers/utils/jinjaTemplates.ts @@ -0,0 +1,954 @@ +// source: https://huggingface.co/openai/gpt-oss-20b/blob/main/chat_template.jinja +export const harmonyJinjaTemplate = ` +{# + In addition to the normal inputs of \`messages\` and \`tools\`, this template also accepts the + following kwargs: + - "builtin_tools": A list, can contain "browser" and/or "python". + - "model_identity": A string that optionally describes the model identity. + - "reasoning_effort": A string that describes the reasoning effort, defaults to "medium". + #} +{# Tool Definition Rendering ============================================== #} +{%- macro render_typescript_type(param_spec, required_params, is_nullable=false) -%} + {%- if param_spec.type == "array" -%} + {%- if param_spec["items"] -%} + {%- if param_spec["items"]["type"] == "string" -%} + {{- "string[]" -}} + {%- elif param_spec["items"]["type"] == "number" -%} + {{- "number[]" -}} + {%- elif param_spec["items"]["type"] == "integer" -%} + {{- "number[]" -}} + {%- elif param_spec["items"]["type"] == "boolean" -%} + {{- "boolean[]" -}} + {%- else -%} + {%- set inner_type = render_typescript_type(param_spec["items"], required_params) -%} + {%- if inner_type == "object | object" or inner_type | length > 50 -%} + {{- "any[]" -}} + {%- else -%} + {{- inner_type + "[]" -}} + {%- endif -%} + {%- endif -%} + {%- if param_spec.nullable -%} + {{- " | null" -}} + {%- endif -%} + {%- else -%} + {{- "any[]" -}} + {%- if param_spec.nullable -%} + {{- " | null" -}} + {%- endif -%} + {%- endif -%} + {%- elif param_spec.type is defined and param_spec.type is iterable and param_spec.type is not string and param_spec.type is not mapping and param_spec.type[0] is defined -%} + {# Handle array of types like ["object", "object"] from Union[dict, list] #} + {%- if param_spec.type | length > 1 -%} + {{- param_spec.type | join(" | ") -}} + {%- else -%} + {{- param_spec.type[0] -}} + {%- endif -%} + {%- elif param_spec.oneOf -%} + {# Handle oneOf schemas - check for complex unions and fallback to any #} + {%- set has_object_variants = false -%} + {%- for variant in param_spec.oneOf -%} + {%- if variant.type == "object" -%} + {%- set has_object_variants = true -%} + {%- endif -%} + {%- endfor -%} + {%- if has_object_variants and param_spec.oneOf | length > 1 -%} + {{- "any" -}} + {%- else -%} + {%- for variant in param_spec.oneOf -%} + {{- render_typescript_type(variant, required_params) -}} + {%- if variant.description -%} + {{- "// " + variant.description -}} + {%- endif -%} + {%- if variant.default is defined -%} + {{- " " -}} + {{- "// default: " + variant.default | tojson -}} + {%- endif -%} + {%- if not loop.last -%} + {{- " | " -}} + {{- "\\n" -}} + {%- endif -%} + {%- endfor -%} + {%- endif -%} + {%- elif param_spec.type == "string" -%} + {%- if param_spec.enum -%} + {{- "\\"" + param_spec.enum | join("\\" | \\"") + "\\"" -}} + {%- else -%} + {{- "string" -}} + {%- if param_spec.nullable -%} + {{- " | null" -}} + {%- endif -%} + {%- endif -%} + {%- elif param_spec.type == "number" -%} + {{- "number" -}} + {%- elif param_spec.type == "integer" -%} + {{- "number" -}} + {%- elif param_spec.type == "boolean" -%} + {{- "boolean" -}} + {%- elif param_spec.type == "object" -%} + {%- if param_spec.properties -%} + {{- "{\\n" -}} + {%- for (prop_name, prop_spec) in param_spec.properties.items() -%} + {{- prop_name -}} + {%- if prop_name not in (param_spec.required or []) -%} + {{- "?" -}} + {%- endif -%} + {{- ": " -}} + {{- "\\n " -}} + {{- render_typescript_type(prop_spec, (param_spec.required or [])) -}} + {%- if not loop.last -%} + {{- ", " -}} + {%- endif -%} + {%- endfor -%} + {{- "}" -}} + {%- else -%} + {{- "object" -}} + {%- endif -%} + {%- else -%} + {{- "any" -}} + {%- endif -%} +{%- endmacro -%} +{%- macro render_tool_namespace(namespace_name, tools) -%} + {{- "## " + namespace_name + "\\n\\n" -}} + {{- "namespace " + namespace_name + " {\\n\\n" -}} + {%- for tool in tools -%} + {%- set tool = tool.function -%} + {{- "// " + tool.description + "\\n" -}} + {{- "type " + tool.name + " = " -}} + {%- if tool.parameters and tool.parameters.properties -%} + {{- "(_: {\\n" -}} + {%- for (param_name, param_spec) in tool.parameters.properties.items() -%} + {%- if param_spec.description -%} + {{- "// " + param_spec.description + "\\n" -}} + {%- endif -%} + {{- param_name -}} + {%- if param_name not in (tool.parameters.required or []) -%} + {{- "?" -}} + {%- endif -%} + {{- ": " -}} + {{- render_typescript_type(param_spec, (tool.parameters.required or [])) -}} + {%- if param_spec.default is defined -%} + {%- if param_spec.enum -%} + {{- ", // default: " + param_spec.default -}} + {%- elif param_spec.oneOf -%} + {{- "// default: " + param_spec.default -}} + {%- else -%} + {{- ", // default: " + param_spec.default | tojson -}} + {%- endif -%} + {%- endif -%} + {%- if not loop.last -%} + {{- ",\\n" -}} + {%- else -%} + {{- ",\\n" -}} + {%- endif -%} + {%- endfor -%} + {{- "}) => any;\\n\\n" -}} + {%- else -%} + {{- "() => any;\\n\\n" -}} + {%- endif -%} + {%- endfor -%} + {{- "} // namespace " + namespace_name -}} +{%- endmacro -%} +{%- macro render_builtin_tools(browser_tool, python_tool) -%} + {%- if browser_tool -%} + {{- "## browser\\n\\n" -}} + {{- "// Tool for browsing.\\n" -}} + {{- "// The \`cursor\` appears in brackets before each browsing display: \`[{cursor}]\`.\\n" -}} + {{- "// Cite information from the tool using the following format:\\n" -}} + {{- "// \`【{cursor}†L{line_start}(-L{line_end})?】\`, for example: \`【6†L9-L11】\` or \`【8†L3】\`.\\n" -}} + {{- "// Do not quote more than 10 words directly from the tool output.\\n" -}} + {{- "// sources=web (default: web)\\n" -}} + {{- "namespace browser {\\n\\n" -}} + {{- "// Searches for information related to \`query\` and displays \`topn\` results.\\n" -}} + {{- "type search = (_: {\\n" -}} + {{- "query: string,\\n" -}} + {{- "topn?: number, // default: 10\\n" -}} + {{- "source?: string,\\n" -}} + {{- "}) => any;\\n\\n" -}} + {{- "// Opens the link \`id\` from the page indicated by \`cursor\` starting at line number \`loc\`, showing \`num_lines\` lines.\\n" -}} + {{- "// Valid link ids are displayed with the formatting: \`【{id}†.*】\`.\\n" -}} + {{- "// If \`cursor\` is not provided, the most recent page is implied.\\n" -}} + {{- "// If \`id\` is a string, it is treated as a fully qualified URL associated with \`source\`.\\n" -}} + {{- "// If \`loc\` is not provided, the viewport will be positioned at the beginning of the document or centered on the most relevant passage, if available.\\n" -}} + {{- "// Use this function without \`id\` to scroll to a new location of an opened page.\\n" -}} + {{- "type open = (_: {\\n" -}} + {{- "id?: number | string, // default: -1\\n" -}} + {{- "cursor?: number, // default: -1\\n" -}} + {{- "loc?: number, // default: -1\\n" -}} + {{- "num_lines?: number, // default: -1\\n" -}} + {{- "view_source?: boolean, // default: false\\n" -}} + {{- "source?: string,\\n" -}} + {{- "}) => any;\\n\\n" -}} + {{- "// Finds exact matches of \`pattern\` in the current page, or the page given by \`cursor\`.\\n" -}} + {{- "type find = (_: {\\n" -}} + {{- "pattern: string,\\n" -}} + {{- "cursor?: number, // default: -1\\n" -}} + {{- "}) => any;\\n\\n" -}} + {{- "} // namespace browser\\n\\n" -}} + {%- endif -%} + {%- if python_tool -%} + {{- "## python\\n\\n" -}} + {{- "Use this tool to execute Python code in your chain of thought. The code will not be shown to the user. This tool should be used for internal reasoning, but not for code that is intended to be visible to the user (e.g. when creating plots, tables, or files).\\n\\n" -}} + {{- "When you send a message containing Python code to python, it will be executed in a stateful Jupyter notebook environment. python will respond with the output of the execution or time out after 120.0 seconds. The drive at '/mnt/data' can be used to save and persist user files. Internet access for this session is UNKNOWN. Depends on the cluster.\\n\\n" -}} + {%- endif -%} +{%- endmacro -%} +{# System Message Construction ============================================ #} +{%- macro build_system_message() -%} + {%- if model_identity is not defined -%} + {%- set model_identity = "You are ChatGPT, a large language model trained by OpenAI." -%} + {%- endif -%} + {{- model_identity + "\\n" -}} + {{- "Knowledge cutoff: 2024-06\\n" -}} + {{- "Current date: " + strftime_now("%Y-%m-%d") + "\\n\\n" -}} + {%- if reasoning_effort is not defined -%} + {%- set reasoning_effort = "medium" -%} + {%- endif -%} + {{- "Reasoning: " + reasoning_effort + "\\n\\n" -}} + {%- if builtin_tools -%} + {{- "# Tools\\n\\n" -}} + {%- set available_builtin_tools = namespace(browser=false, python=false) -%} + {%- for tool in builtin_tools -%} + {%- if tool == "browser" -%} + {%- set available_builtin_tools.browser = true -%} + {%- elif tool == "python" -%} + {%- set available_builtin_tools.python = true -%} + {%- endif -%} + {%- endfor -%} + {{- render_builtin_tools(available_builtin_tools.browser, available_builtin_tools.python) -}} + {%- endif -%} + {{- "# Valid channels: analysis, commentary, final. Channel must be included for every message." -}} + {%- if tools -%} + {{- "\\nCalls to these tools must go to the commentary channel: 'functions'." -}} + {%- endif -%} +{%- endmacro -%} +{# Main Template Logic ================================================= #} +{# Set defaults #} +{# Render system message #} +{{- "<|start|>system<|message|>" -}} +{{- build_system_message() -}} +{{- "<|end|>" -}} +{# Extract developer message #} +{%- if messages[0].role == "developer" or messages[0].role == "system" -%} + {%- set developer_message = messages[0].content -%} + {%- set loop_messages = messages[1:] -%} +{%- else -%} + {%- set developer_message = "" -%} + {%- set loop_messages = messages -%} +{%- endif -%} +{# Render developer message #} +{%- if developer_message or tools -%} + {{- "<|start|>developer<|message|>" -}} + {%- if developer_message -%} + {{- "# Instructions\\n\\n" -}} + {{- developer_message -}} + {%- endif -%} + {%- if tools -%} + {{- "\\n\\n" -}} + {{- "# Tools\\n\\n" -}} + {{- render_tool_namespace("functions", tools) -}} + {%- endif -%} + {{- "<|end|>" -}} +{%- endif -%} +{# Render messages #} +{%- set last_tool_call = namespace(name=none) -%} +{%- for message in loop_messages -%} + {# At this point only assistant/user/tool messages should remain #} + {%- if message.role == "assistant" -%} + {# Checks to ensure the messages are being passed in the format we expect #} + {%- if "content" in message -%} + {%- if "<|channel|>analysis<|message|>" in message.content or "<|channel|>final<|message|>" in message.content -%} + {{- raise_exception("You have passed a message containing <|channel|> tags in the content field. Instead of doing this, you should pass analysis messages (the string between '<|message|>' and '<|end|>') in the 'thinking' field, and final messages (the string between '<|message|>' and '<|end|>') in the 'content' field.") -}} + {%- endif -%} + {%- endif -%} + {%- if "thinking" in message -%} + {%- if "<|channel|>analysis<|message|>" in message.thinking or "<|channel|>final<|message|>" in message.thinking -%} + {{- raise_exception("You have passed a message containing <|channel|> tags in the thinking field. Instead of doing this, you should pass analysis messages (the string between '<|message|>' and '<|end|>') in the 'thinking' field, and final messages (the string between '<|message|>' and '<|end|>') in the 'content' field.") -}} + {%- endif -%} + {%- endif -%} + {%- if "tool_calls" in message -%} + {# We need very careful handling here - we want to drop the tool call analysis message if the model #} + {# has output a later <|final|> message, but otherwise we want to retain it. This is the only case #} + {# when we render CoT/analysis messages in inference. #} + {%- set future_final_message = namespace(found=false) -%} + {%- for future_message in loop_messages[loop.index:] -%} + {%- if future_message.role == "assistant" and "tool_calls" not in future_message -%} + {%- set future_final_message.found = true -%} + {%- endif -%} + {%- endfor -%} + {# We assume max 1 tool call per message, and so we infer the tool call name #} + {# in "tool" messages from the most recent assistant tool call name #} + {%- set tool_call = message.tool_calls[0] -%} + {%- if tool_call.function -%} + {%- set tool_call = tool_call.function -%} + {%- endif -%} + {%- if message.content and message.thinking -%} + {{- raise_exception("Cannot pass both content and thinking in an assistant message with tool calls! Put the analysis message in one or the other, but not both.") -}} + {%- elif message.content and not future_final_message.found -%} + {{- "<|start|>assistant<|channel|>analysis<|message|>" + message.content + "<|end|>" -}} + {%- elif message.thinking and not future_final_message.found -%} + {{- "<|start|>assistant<|channel|>analysis<|message|>" + message.thinking + "<|end|>" -}} + {%- endif -%} + {{- "<|start|>assistant to=" -}} + {{- "functions." + tool_call.name + "<|channel|>commentary " -}} + {{- (tool_call.content_type if tool_call.content_type is defined else "json") + "<|message|>" -}} + {{- tool_call.arguments | tojson -}} + {{- "<|call|>" -}} + {%- set last_tool_call.name = tool_call.name -%} + {%- elif loop.last and not add_generation_prompt -%} + {# Only render the CoT if the final turn is an assistant turn and add_generation_prompt is false #} + {# This is a situation that should only occur in training, never in inference. #} + {%- if "thinking" in message -%} + {{- "<|start|>assistant<|channel|>analysis<|message|>" + message.thinking + "<|end|>" -}} + {%- endif -%} + {# <|return|> indicates the end of generation, but <|end|> does not #} + {# <|return|> should never be an input to the model, but we include it as the final token #} + {# when training, so the model learns to emit it. #} + {{- "<|start|>assistant<|channel|>final<|message|>" + message.content + "<|return|>" -}} + {%- else -%} + {# CoT is dropped during all previous turns, so we never render it for inference #} + {{- "<|start|>assistant<|channel|>final<|message|>" + message.content + "<|end|>" -}} + {%- set last_tool_call.name = none -%} + {%- endif -%} + {%- elif message.role == "tool" -%} + {%- if last_tool_call.name is none -%} + {{- raise_exception("Message has tool role, but there was no previous assistant message with a tool call!") -}} + {%- endif -%} + {{- "<|start|>functions." + last_tool_call.name -}} + {{- " to=assistant<|channel|>commentary<|message|>" + message.content | tojson + "<|end|>" -}} + {%- elif message.role == "user" -%} + {{- "<|start|>user<|message|>" + message.content + "<|end|>" -}} + {%- endif -%} +{%- endfor -%} +{# Generation prompt #} +{%- if add_generation_prompt -%} + {{- "<|start|>assistant" -}} +{%- endif -%} +`.slice(1, -1); + +export const harmonyJinjaTemplate2 = ` +{# Copyright 2025-present Unsloth. Apache 2.0 License. Unsloth chat template fixes. Edited from ggml-org & OpenAI #} +{#- + In addition to the normal inputs of \`messages\` and \`tools\`, this template also accepts the + following kwargs: + - "builtin_tools": A list, can contain "browser" and/or "python". + - "model_identity": A string that optionally describes the model identity. + - "reasoning_effort": A string that describes the reasoning effort, defaults to "medium". + #} + +{#- Tool Definition Rendering ============================================== #} +{%- macro render_typescript_type(param_spec, required_params, is_nullable=false) -%} + {%- if param_spec.type == "array" -%} + {%- if param_spec['items'] -%} + {%- if param_spec['items']['type'] == "string" -%} + {{- "string[]" }} + {%- elif param_spec['items']['type'] == "number" -%} + {{- "number[]" }} + {%- elif param_spec['items']['type'] == "integer" -%} + {{- "number[]" }} + {%- elif param_spec['items']['type'] == "boolean" -%} + {{- "boolean[]" }} + {%- else -%} + {%- set inner_type = render_typescript_type(param_spec['items'], required_params) -%} + {%- if inner_type == "object | object" or inner_type|length > 50 -%} + {{- "any[]" }} + {%- else -%} + {{- inner_type + "[]" }} + {%- endif -%} + {%- endif -%} + {%- if param_spec.nullable -%} + {{- " | null" }} + {%- endif -%} + {%- else -%} + {{- "any[]" }} + {%- if param_spec.nullable -%} + {{- " | null" }} + {%- endif -%} + {%- endif -%} + {%- elif param_spec.type is defined and param_spec.type is iterable and param_spec.type is not string and param_spec.type is not mapping and param_spec.type[0] is defined -%} + {#- Handle array of types like ["object", "object"] from Union[dict, list] #} + {%- if param_spec.type | length > 1 -%} + {{- param_spec.type | join(" | ") }} + {%- else -%} + {{- param_spec.type[0] }} + {%- endif -%} + {%- elif param_spec.oneOf -%} + {#- Handle oneOf schemas - check for complex unions and fallback to any #} + {%- set has_object_variants = false -%} + {%- for variant in param_spec.oneOf -%} + {%- if variant.type == "object" -%} + {%- set has_object_variants = true -%} + {%- endif -%} + {%- endfor -%} + {%- if has_object_variants and param_spec.oneOf|length > 1 -%} + {{- "any" }} + {%- else -%} + {%- for variant in param_spec.oneOf -%} + {{- render_typescript_type(variant, required_params) -}} + {%- if variant.description %} + {{- "// " + variant.description }} + {%- endif -%} + {%- if variant.default is defined %} + {{ "// default: " + variant.default|tojson }} + {%- endif -%} + {%- if not loop.last %} + {{- " | " }} + {% endif -%} + {%- endfor -%} + {%- endif -%} + {%- elif param_spec.type == "string" -%} + {%- if param_spec.enum -%} + {{- '"' + param_spec.enum|join('" | "') + '"' -}} + {%- else -%} + {{- "string" }} + {%- if param_spec.nullable %} + {{- " | null" }} + {%- endif -%} + {%- endif -%} + {%- elif param_spec.type == "number" -%} + {{- "number" }} + {%- elif param_spec.type == "integer" -%} + {{- "number" }} + {%- elif param_spec.type == "boolean" -%} + {{- "boolean" }} + + {%- elif param_spec.type == "object" -%} + {%- if param_spec.properties -%} + {{- "{\\n" }} + {%- for prop_name, prop_spec in param_spec.properties.items() -%} + {{- prop_name -}} + {%- if prop_name not in (param_spec.required or []) -%} + {{- "?" }} + {%- endif -%} + {{- ": " }} + {{ render_typescript_type(prop_spec, param_spec.required or []) }} + {%- if not loop.last -%} + {{-", " }} + {%- endif -%} + {%- endfor -%} + {{- "}" }} + {%- else -%} + {{- "object" }} + {%- endif -%} + {%- else -%} + {{- "any" }} + {%- endif -%} +{%- endmacro -%} + +{%- macro render_tool_namespace(namespace_name, tools) -%} + {{- "## " + namespace_name + "\\n\\n" }} + {{- "namespace " + namespace_name + " {\\n\\n" }} + {%- for tool in tools %} + {%- set tool = tool.function %} + {{- "// " + tool.description + "\\n" }} + {{- "type "+ tool.name + " = " }} + {%- if tool.parameters and tool.parameters.properties -%} + {{- "(_: " }} + {{- "{\\n" }} + {%- for param_name, param_spec in tool.parameters.properties.items() %} + {{- "// " + param_spec.description + "\\n" }} + {{- param_name }} + {%- if param_name not in (tool.parameters.required or []) -%} + {{- "?" }} + {%- endif -%} + {{- ": " }} + {{- render_typescript_type(param_spec, tool.parameters.required or []) }} + {%- if param_spec.default is defined -%} + {%- if param_spec.enum %} + {{- ", // default: " + param_spec.default }} + {%- elif param_spec.oneOf %} + {{- "// default: " + param_spec.default }} + {%- else %} + {{- ", // default: " + param_spec.default|tojson }} + {%- endif -%} + {%- endif -%} + {%- if not loop.last %} + {{- ",\\n" }} + {%- else %} + {{- "\\n" }} + {%- endif -%} + {%- endfor %} + {{- "}) => any;\\n\\n" }} + {%- else -%} + {{- "() => any;\\n\\n" }} + {%- endif -%} + {%- endfor %} + {{- "} // namespace " + namespace_name }} +{%- endmacro -%} + +{%- macro render_builtin_tools(browser_tool, python_tool) -%} + {%- if browser_tool %} + {{- "## browser\\n\\n" }} + {{- "// Tool for browsing.\\n" }} + {{- "// The \`cursor\` appears in brackets before each browsing display: \`[{cursor}]\`.\\n" }} + {{- "// Cite information from the tool using the following format:\\n" }} + {{- "// \`【{cursor}†L{line_start}(-L{line_end})?】\`, for example: \`【6†L9-L11】\` or \`【8†L3】\`.\\n" }} + {{- "// Do not quote more than 10 words directly from the tool output.\\n" }} + {{- "// sources=web (default: web)\\n" }} + {{- "namespace browser {\\n\\n" }} + {{- "// Searches for information related to \`query\` and displays \`topn\` results.\\n" }} + {{- "type search = (_: {\\n" }} + {{- "query: string,\\n" }} + {{- "topn?: number, // default: 10\\n" }} + {{- "source?: string,\\n" }} + {{- "}) => any;\\n\\n" }} + {{- "// Opens the link \`id\` from the page indicated by \`cursor\` starting at line number \`loc\`, showing \`num_lines\` lines.\\n" }} + {{- "// Valid link ids are displayed with the formatting: \`【{id}†.*】\`.\\n" }} + {{- "// If \`cursor\` is not provided, the most recent page is implied.\\n" }} + {{- "// If \`id\` is a string, it is treated as a fully qualified URL associated with \`source\`.\\n" }} + {{- "// If \`loc\` is not provided, the viewport will be positioned at the beginning of the document or centered on the most relevant passage, if available.\\n" }} + {{- "// Use this function without \`id\` to scroll to a new location of an opened page.\\n" }} + {{- "type open = (_: {\\n" }} + {{- "id?: number | string, // default: -1\\n" }} + {{- "cursor?: number, // default: -1\\n" }} + {{- "loc?: number, // default: -1\\n" }} + {{- "num_lines?: number, // default: -1\\n" }} + {{- "view_source?: boolean, // default: false\\n" }} + {{- "source?: string,\\n" }} + {{- "}) => any;\\n\\n" }} + {{- "// Finds exact matches of \`pattern\` in the current page, or the page given by \`cursor\`.\\n" }} + {{- "type find = (_: {\\n" }} + {{- "pattern: string,\\n" }} + {{- "cursor?: number, // default: -1\\n" }} + {{- "}) => any;\\n\\n" }} + {{- "} // namespace browser\\n\\n" }} + {%- endif -%} + + {%- if python_tool %} + {{- "## python\\n\\n" }} + {{- "Use this tool to execute Python code in your chain of thought. The code will not be shown to the user. This tool should be used for internal reasoning, but not for code that is intended to be visible to the user (e.g. when creating plots, tables, or files).\\n\\n" }} + {{- "When you send a message containing Python code to python, it will be executed in a stateful Jupyter notebook environment. python will respond with the output of the execution or time out after 120.0 seconds. The drive at '/mnt/data' can be used to save and persist user files. Internet access for this session is UNKNOWN. Depends on the cluster.\\n\\n" }} + {%- endif -%} +{%- endmacro -%} + +{#- System Message Construction ============================================ #} +{%- macro build_system_message() -%} + {%- if model_identity is not defined %} + {{- "You are ChatGPT, a large language model trained by OpenAI." -}} + {%- else %} + {{- model_identity }} + {%- endif %} + {{- "\\nKnowledge cutoff: 2024-06\\n" }} + {{- "Current date: " + strftime_now("%Y-%m-%d") + "\\n\\n" }} + {%- if reasoning_effort is not defined %} + {%- set reasoning_effort = "medium" %} + {%- endif %} + {{- "Reasoning: " + reasoning_effort + "\\n\\n" }} + {%- if builtin_tools is defined and builtin_tools is none %} + {{- "# Tools\\n\\n" }} + {%- set available_builtin_tools = namespace(browser=false, python=false) %} + {%- for tool in builtin_tools %} + {%- if tool == "browser" %} + {%- set available_builtin_tools.browser = true %} + {%- elif tool == "python" %} + {%- set available_builtin_tools.python = true %} + {%- endif %} + {%- endfor %} + {{- render_builtin_tools(available_builtin_tools.browser, available_builtin_tools.python) }} + {%- endif -%} + {{- "# Valid channels: analysis, commentary, final. Channel must be included for every message." }} + {%- if tools is defined and tools is not none -%} + {{- "\\nCalls to these tools must go to the commentary channel: 'functions'." }} + {%- endif -%} +{%- endmacro -%} + +{#- Main Template Logic ================================================= #} +{#- Set defaults #} + +{#- Render system message #} +{{- "<|start|>system<|message|>" }} +{{- build_system_message() }} +{{- "<|end|>" }} + +{#- Extract developer message #} +{%- if messages[0].role == "developer" or messages[0].role == "system" %} + {%- set developer_message = messages[0].content %} + {%- set loop_messages = messages[1:] %} +{%- else %} + {%- set developer_message = "" %} + {%- set loop_messages = messages %} +{%- endif %} + +{#- Render developer message #} +{%- if developer_message or tools %} + {{- "<|start|>developer<|message|>" }} + {%- if developer_message %} + {{- "# Instructions\\n\\n" }} + {{- developer_message }} + {%- endif %} + {%- if tools -%} + {{- "\\n\\n" }} + {{- "# Tools\\n\\n" }} + {{- render_tool_namespace("functions", tools) }} + {%- endif -%} + {{- "<|end|>" }} +{%- endif %} + +{#- Render messages #} +{%- set last_tool_call = namespace(name=none) %} +{%- for message in loop_messages -%} + {#- At this point only assistant/user/tool messages should remain #} + {%- if message.role == 'assistant' -%} + {%- if "tool_calls" in message %} + {#- We assume max 1 tool call per message, and so we infer the tool call name #} + {#- in "tool" messages from the most recent assistant tool call name #} + {%- set tool_call = message.tool_calls[0] %} + {%- if tool_call.function %} + {%- set tool_call = tool_call.function %} + {%- endif %} + {%- if message.content %} + {{- "<|start|>assistant<|channel|>analysis<|message|>" + message.content + "<|end|>" }} + {%- endif %} + {{- "<|start|>assistant to=" }} + {{- "functions." + tool_call.name + "<|channel|>commentary json<|message|>" }} + {{- tool_call.arguments|tojson }} + {{- "<|call|>" }} + {%- set last_tool_call.name = tool_call.name %} + {%- elif "thinking" in message and loop.last and not add_generation_prompt %} + {#- Only render the CoT if the final turn is an assistant turn and add_generation_prompt is false #} + {#- This is a situation that should only occur in training, never in inference. #} + {{- "<|start|>assistant<|channel|>analysis<|message|>" + message.thinking + "<|end|>" }} + {#- <|return|> indicates the end of generation, but <|end|> does not #} + {#- <|return|> should never be an input to the model, but we include it as the final token #} + {#- when training, so the model learns to emit it. #} + {{- "<|start|>assistant<|channel|>final<|message|>" + message.content + "<|return|>" }} + {%- set last_tool_call.name = none %} + {%- elif "thinking" in message %} + {#- CoT is dropped during all previous turns, so we never render it for inference #} + {{- "<|start|>assistant<|channel|>final<|message|>" + message.content + "<|end|>" }} + {%- set last_tool_call.name = none %} + {%- elif loop.last and not add_generation_prompt %} + {#- <|return|> indicates the end of generation, but <|end|> does not #} + {#- <|return|> should never be an input to the model, but we include it as the final token #} + {#- when training, so the model learns to emit it. #} + {{- "<|start|>assistant<|message|>" + message.content + "<|return|>" }} + {%- else %} + {{- "<|start|>assistant<|message|>" + message.content + "<|end|>" }} + {%- set last_tool_call.name = none %} + {%- endif %} + {%- elif message.role == 'tool' -%} + {%- if last_tool_call.name is none %} + {{- raise_exception("Message has tool role, but there was no previous assistant message with a tool call!") }} + {%- endif %} + {{- "<|start|>functions." + last_tool_call.name }} + {{- " to=assistant<|channel|>commentary<|message|>" + message.content|tojson + "<|end|>" }} + {%- else -%} + {{- "<|start|>user<|message|>" + message.content + "<|end|>" }} + {%- endif -%} +{%- endfor -%} + +{#- Generation prompt #} +{%- if add_generation_prompt -%} +<|start|>assistant +{%- endif -%} +{# Copyright 2025-present Unsloth. Apache 2.0 License. Unsloth chat template fixes. Edited from ggml-org & OpenAI #} +`.slice(1, -1); + + +export const harmonyJinjaTemplate3 = ` +{#- + In addition to the normal inputs of \`messages\` and \`tools\`, this template also accepts the + following kwargs: + - "builtin_tools": A list, can contain "browser" and/or "python". + - "model_identity": A string that optionally describes the model identity. + - "reasoning_effort": A string that describes the reasoning effort, defaults to "medium". + #} + +{#- Tool Definition Rendering ============================================== #} +{%- macro render_typescript_type(param_spec, required_params, is_nullable=false) -%} + {%- if param_spec.type == "array" -%} + {%- if param_spec['items'] -%} + {%- if param_spec['items']['type'] == "string" -%} + {{- "string[]" }} + {%- elif param_spec['items']['type'] == "number" -%} + {{- "number[]" }} + {%- elif param_spec['items']['type'] == "integer" -%} + {{- "number[]" }} + {%- elif param_spec['items']['type'] == "boolean" -%} + {{- "boolean[]" }} + {%- else -%} + {%- set inner_type = render_typescript_type(param_spec['items'], required_params) -%} + {%- if inner_type == "object | object" or inner_type|length > 50 -%} + {{- "any[]" }} + {%- else -%} + {{- inner_type + "[]" }} + {%- endif -%} + {%- endif -%} + {%- if param_spec.nullable -%} + {{- " | null" }} + {%- endif -%} + {%- else -%} + {{- "any[]" }} + {%- if param_spec.nullable -%} + {{- " | null" }} + {%- endif -%} + {%- endif -%} + {%- elif param_spec.type is defined and param_spec.type is iterable and param_spec.type is not string and param_spec.type is not mapping and param_spec.type[0] is defined -%} + {#- Handle array of types like ["object", "object"] from Union[dict, list] #} + {%- if param_spec.type | length > 1 -%} + {{- param_spec.type | join(" | ") }} + {%- else -%} + {{- param_spec.type[0] }} + {%- endif -%} + {%- elif param_spec.oneOf -%} + {#- Handle oneOf schemas - check for complex unions and fallback to any #} + {%- set has_object_variants = false -%} + {%- for variant in param_spec.oneOf -%} + {%- if variant.type == "object" -%} + {%- set has_object_variants = true -%} + {%- endif -%} + {%- endfor -%} + {%- if has_object_variants and param_spec.oneOf|length > 1 -%} + {{- "any" }} + {%- else -%} + {%- for variant in param_spec.oneOf -%} + {{- render_typescript_type(variant, required_params) -}} + {%- if variant.description %} + {{- "// " + variant.description }} + {%- endif -%} + {%- if variant.default is defined %} + {{ "// default: " + variant.default|tojson }} + {%- endif -%} + {%- if not loop.last %} + {{- " | " }} + {% endif -%} + {%- endfor -%} + {%- endif -%} + {%- elif param_spec.type == "string" -%} + {%- if param_spec.enum -%} + {{- '"' + param_spec.enum|join('" | "') + '"' -}} + {%- else -%} + {{- "string" }} + {%- if param_spec.nullable %} + {{- " | null" }} + {%- endif -%} + {%- endif -%} + {%- elif param_spec.type == "number" -%} + {{- "number" }} + {%- elif param_spec.type == "integer" -%} + {{- "number" }} + {%- elif param_spec.type == "boolean" -%} + {{- "boolean" }} + + {%- elif param_spec.type == "object" -%} + {%- if param_spec.properties -%} + {{- "{\\n" }} + {%- for prop_name, prop_spec in param_spec.properties.items() -%} + {{- prop_name -}} + {%- if prop_name not in (param_spec.required or []) -%} + {{- "?" }} + {%- endif -%} + {{- ": " }} + {{ render_typescript_type(prop_spec, param_spec.required or []) }} + {%- if not loop.last -%} + {{-", " }} + {%- endif -%} + {%- endfor -%} + {{- "}" }} + {%- else -%} + {{- "object" }} + {%- endif -%} + {%- else -%} + {{- "any" }} + {%- endif -%} +{%- endmacro -%} + +{%- macro render_tool_namespace(namespace_name, tools) -%} + {{- "## " + namespace_name + "\\n\\n" }} + {{- "namespace " + namespace_name + " {\\n\\n" }} + {%- for tool in tools %} + {%- set tool = tool.function %} + {{- "// " + tool.description + "\\n" }} + {{- "type "+ tool.name + " = (" }} + {%- if tool.parameters and tool.parameters.properties -%} + {{- "_: " }} + {{- "{\\n" }} + {%- for param_name, param_spec in tool.parameters.properties.items() %} + {{- "// " + param_spec.description + "\\n" }} + {{- param_name }} + {%- if param_name not in (tool.parameters.required or []) -%} + {{- "?" }} + {%- endif -%} + {{- ": " }} + {{- render_typescript_type(param_spec, tool.parameters.required or []) }} + {%- if param_spec.default is defined -%} + {%- if param_spec.oneOf %} + {{- "// default: " + param_spec.default }} + {%- else %} + {{- ", // default: " + param_spec.default|tojson }} + {%- endif -%} + {%- endif -%} + {%- if not loop.last %} + {{- ",\\n" }} + {%- endif -%} + {%- endfor %} + {{- ",\\n}) => any;\\n" }} + {%- else -%} + {{- "\\n}) => any;\\n" }} + {%- endif -%} + {%- endfor %} + {{- "\\n} // namespace " + namespace_name }} +{%- endmacro -%} + +{%- macro render_builtin_tools(browser_tool, python_tool) -%} + {%- if browser_tool %} + {{- "## browser\\n\\n" }} + {{- "// Tool for browsing.\\n" }} + {{- "// The \`cursor\` appears in brackets before each browsing display: \`[{cursor}]\`.\\n" }} + {{- "// Cite information from the tool using the following format:\\n" }} + {{- "// \`【{cursor}†L{line_start}(-L{line_end})?】\`, for example: \`【6†L9-L11】\` or \`【8†L3】\`.\\n" }} + {{- "// Do not quote more than 10 words directly from the tool output.\\n" }} + {{- "// sources=web (default: web)\\n" }} + {{- "namespace browser {\\n\\n" }} + {{- "// Searches for information related to \`query\` and displays \`topn\` results.\\n" }} + {{- "type search = (_: {\\n" }} + {{- "query: string,\\n" }} + {{- "topn?: number, // default: 10\\n" }} + {{- "source?: string,\\n" }} + {{- "}) => any;\\n\\n" }} + {{- "// Opens the link \`id\` from the page indicated by \`cursor\` starting at line number \`loc\`, showing \`num_lines\` lines.\\n" }} + {{- "// Valid link ids are displayed with the formatting: \`【{id}†.*】\`.\\n" }} + {{- "// If \`cursor\` is not provided, the most recent page is implied.\\n" }} + {{- "// If \`id\` is a string, it is treated as a fully qualified URL associated with \`source\`.\\n" }} + {{- "// If \`loc\` is not provided, the viewport will be positioned at the beginning of the document or centered on the most relevant passage, if available.\\n" }} + {{- "// Use this function without \`id\` to scroll to a new location of an opened page.\\n" }} + {{- "type open = (_: {\\n" }} + {{- "id?: number | string, // default: -1\\n" }} + {{- "cursor?: number, // default: -1\\n" }} + {{- "loc?: number, // default: -1\\n" }} + {{- "num_lines?: number, // default: -1\\n" }} + {{- "view_source?: boolean, // default: false\\n" }} + {{- "source?: string,\\n" }} + {{- "}) => any;\\n\\n" }} + {{- "// Finds exact matches of \`pattern\` in the current page, or the page given by \`cursor\`.\\n" }} + {{- "type find = (_: {\\n" }} + {{- "pattern: string,\\n" }} + {{- "cursor?: number, // default: -1\\n" }} + {{- "}) => any;\\n\\n" }} + {{- "} // namespace browser\\n\\n" }} + {%- endif -%} + + {%- if python_tool %} + {{- "## python\\n\\n" }} + {{- "Use this tool to execute Python code in your chain of thought. The code will not be shown to the user. This tool should be used for internal reasoning, but not for code that is intended to be visible to the user (e.g. when creating plots, tables, or files).\\n\\n" }} + {{- "When you send a message containing Python code to python, it will be executed in a stateful Jupyter notebook environment. python will respond with the output of the execution or time out after 120.0 seconds. The drive at '/mnt/data' can be used to save and persist user files. Internet access for this session is UNKNOWN. Depends on the cluster.\\n\\n" }} + {%- endif -%} +{%- endmacro -%} + +{#- System Message Construction ============================================ #} +{%- macro build_system_message() -%} + {%- if model_identity is not defined %} + {{- "You are ChatGPT, a large language model trained by OpenAI.\\n" -}} + {%- else %} + {{- model_identity }} + {%- endif %} + {{- "Knowledge cutoff: 2024-06\\n" }} + {{- "Current date: " + strftime_now("%Y-%m-%d") + "\\n\\n" }} + {%- if reasoning_effort is not defined %} + {%- set reasoning_effort = "medium" %} + {%- endif %} + {{- "reasoning: " + reasoning_effort + "\\n\\n" }} + {%- if builtin_tools %} + {{- "# Tools\\n\\n" }} + {%- set available_builtin_tools = namespace(browser=false, python=false) %} + {%- for tool in builtin_tools %} + {%- if tool == "browser" %} + {%- set available_builtin_tools.browser = true %} + {%- elif tool == "python" %} + {%- set available_builtin_tools.python = true %} + {%- endif %} + {%- endfor %} + {{- render_builtin_tools(available_builtin_tools.browser, available_builtin_tools.python) }} + {%- endif -%} + {{- "# Valid channels: analysis, commentary, final. Channel must be included for every message.\\n" }} + {{- "Calls to these tools must go to the commentary channel: 'functions'." }} +{%- endmacro -%} + +{#- Main Template Logic ================================================= #} +{#- Set defaults #} + +{#- Render system message #} +{{- "<|start|>system<|message|>" }} +{{- build_system_message() }} +{{- "<|end|>" }} + +{#- Extract developer message #} +{%- if messages[0].role == "developer" or messages[0].role == "system" %} + {%- set developer_message = messages[0].content %} + {%- set loop_messages = messages[1:] %} +{%- else %} + {%- set developer_message = "" %} + {%- set loop_messages = messages %} +{%- endif %} + +{#- Render developer message #} +{%- if developer_message or tools %} + {{- "<|start|>developer<|message|>" }} + {%- if developer_message %} + {{- "# Instructions\\n\\n" }} + {{- developer_message }} + {%- endif %} + {%- if tools -%} + {{- "\\n\\n" }} + {{- "# Tools\\n\\n" }} + {{- render_tool_namespace("functions", tools) }} + {%- endif -%} + {{- "<|end|>" }} +{%- endif %} + +{#- Render messages #} +{%- set last_tool_call = namespace(name=none) %} +{%- for message in loop_messages -%} + {#- At this point only assistant/user/tool messages should remain #} + {%- if message.role == 'assistant' -%} + {%- if "tool_calls" in message %} + {#- We assume max 1 tool call per message, and so we infer the tool call name #} + {#- in "tool" messages from the most recent assistant tool call name #} + {%- set tool_call = message.tool_calls[0] %} + {%- if tool_call.function %} + {%- set tool_call = tool_call.function %} + {%- endif %} + {%- if message.content %} + {{- "<|start|>assistant<|channel|>analysis<|message|>" + message.content + "<|end|>" }} + {%- endif %} + {{- "<|start|>assistant to=" }} + {{- "functions." + tool_call.name + "<|channel|>commentary json<|message|>" }} + {{- tool_call.arguments|tojson }} + {{- "<|end|>" }} + {%- set last_tool_call.name = tool_call.name %} + {%- elif "thinking" in message and loop.last and not add_generation_prompt %} + {#- Only render the CoT if the final turn is an assistant turn and add_generation_prompt is false #} + {#- This is a situation that should only occur in training, never in inference. #} + {{- "<|start|>assistant<|channel|>analysis<|message|>" + message.thinking + "<|end|>" }} + {#- <|return|> indicates the end of generation, but <|end|> does not #} + {#- <|return|> should never be an input to the model, but we include it as the final token #} + {#- when training, so the model learns to emit it. #} + {{- "<|start|>assistant<|channel|>final<|message|>" + message.content + "<|return|>" }} + {%- set last_tool_call.name = none %} + {%- elif "thinking" in message %} + {#- CoT is dropped during all previous turns, so we never render it for inference #} + {{- "<|start|>assistant<|channel|>final<|message|>" + message.content + "<|end|>" }} + {%- set last_tool_call.name = none %} + {%- elif loop.last and not add_generation_prompt %} + {#- <|return|> indicates the end of generation, but <|end|> does not #} + {#- <|return|> should never be an input to the model, but we include it as the final token #} + {#- when training, so the model learns to emit it. #} + {{- "<|start|>assistant<|message|>" + message.content + "<|return|>" }} + {%- else %} + {{- "<|start|>assistant<|message|>" + message.content + "<|end|>" }} + {%- set last_tool_call.name = none %} + {%- endif %} + {%- elif message.role == 'tool' -%} + {%- if last_tool_call.name is none %} + {{- raise_exception("Message has tool role, but there was no previous assistant message with a tool call!") }} + {%- endif %} + {{- "<|start|>functions." + last_tool_call.name }} + {{- " to=assistant<|channel|>commentary<|message|>" + message.content|tojson + "<|end|>" }} + {%- else -%} + {{- "<|start|>user<|message|>" + message.content + "<|end|>" }} + {%- endif -%} +{%- endfor -%} + +{#- Generation prompt #} +{%- if add_generation_prompt -%} +<|start|>assistant +{%- endif -%} +`.slice(1, -1); diff --git a/test/standalone/chatWrappers/utils/resolveChatWrapper.test.ts b/test/standalone/chatWrappers/utils/resolveChatWrapper.test.ts index ddea8365..26de4640 100644 --- a/test/standalone/chatWrappers/utils/resolveChatWrapper.test.ts +++ b/test/standalone/chatWrappers/utils/resolveChatWrapper.test.ts @@ -1,8 +1,9 @@ import {describe, expect, test} from "vitest"; import { AlpacaChatWrapper, ChatMLChatWrapper, DeepSeekChatWrapper, FalconChatWrapper, FunctionaryChatWrapper, GemmaChatWrapper, - GeneralChatWrapper, Llama2ChatWrapper, Llama3_1ChatWrapper, MistralChatWrapper, QwenChatWrapper, resolveChatWrapper + GeneralChatWrapper, Llama2ChatWrapper, Llama3_1ChatWrapper, MistralChatWrapper, QwenChatWrapper, resolveChatWrapper, HarmonyChatWrapper } from "../../../../src/index.js"; +import {harmonyJinjaTemplate, harmonyJinjaTemplate2, harmonyJinjaTemplate3} from "./jinjaTemplates.js"; const alpacaJinjaTemplate = ` @@ -718,4 +719,40 @@ describe("resolveChatWrapper", () => { }); expect(chatWrapper).to.be.instanceof(QwenChatWrapper); }); + + test("should resolve to specialized HarmonyChatWrapper", {timeout: 1000 * 60 * 60 * 2}, async () => { + const chatWrapper = resolveChatWrapper({ + customWrapperSettings: { + jinjaTemplate: { + template: harmonyJinjaTemplate + } + }, + fallbackToOtherWrappersOnJinjaError: false + }); + expect(chatWrapper).to.be.instanceof(HarmonyChatWrapper); + }); + + test("should resolve to specialized HarmonyChatWrapper 2", {timeout: 1000 * 60 * 60 * 2}, async () => { + const chatWrapper = resolveChatWrapper({ + customWrapperSettings: { + jinjaTemplate: { + template: harmonyJinjaTemplate2 + } + }, + fallbackToOtherWrappersOnJinjaError: false + }); + expect(chatWrapper).to.be.instanceof(HarmonyChatWrapper); + }); + + test("should resolve to specialized HarmonyChatWrapper 3", {timeout: 1000 * 60 * 60 * 2}, async () => { + const chatWrapper = resolveChatWrapper({ + customWrapperSettings: { + jinjaTemplate: { + template: harmonyJinjaTemplate3 + } + }, + fallbackToOtherWrappersOnJinjaError: false + }); + expect(chatWrapper).to.be.instanceof(HarmonyChatWrapper); + }); }); From 968a64d0b7f5d2aaa12795916b22da9851568cbc Mon Sep 17 00:00:00 2001 From: Gilad S Date: Sat, 9 Aug 2025 05:30:46 +0300 Subject: [PATCH 05/10] fix: types --- src/gguf/types/GgufMetadataTypes.ts | 2 +- src/utils/gbnfJson/types.ts | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/gguf/types/GgufMetadataTypes.ts b/src/gguf/types/GgufMetadataTypes.ts index 3058aa98..04b3c589 100644 --- a/src/gguf/types/GgufMetadataTypes.ts +++ b/src/gguf/types/GgufMetadataTypes.ts @@ -268,7 +268,7 @@ export type GgufMetadataTokenizer = { "pixtral" | "mpt" | "starcoder" | "gpt-2" | "phi-2" | "jina-es" | "jina-de" | "jina-v1-en" | "jina-v2-es" | "jina-v2-de" | "jina-v2-code" | "refact" | "command-r" | "qwen2" | "stablelm2" | "olmo" | "dbrx" | "smaug-bpe" | "poro-chat" | "chatglm-bpe" | "viking" | "jais" | "tekken" | "smollm" | "codeshell" | "bloom" | "gpt3-finnish" | "exaone" | "exaone4" | "chameleon" | - "minerva-7b" | "megrez" | "gpt-4o" | "superbpe" | "trillion" | "bailingmoe" | string, + "minerva-7b" | "megrez" | "gpt-4o" | "superbpe" | "trillion" | "bailingmoe" | "a.x-4.0" | "mellum" | string, readonly tokens: readonly string[], readonly token_type: GgufMetadataTokenizerTokenType[], readonly token_type_count?: number, diff --git a/src/utils/gbnfJson/types.ts b/src/utils/gbnfJson/types.ts index dd17147b..76900755 100644 --- a/src/utils/gbnfJson/types.ts +++ b/src/utils/gbnfJson/types.ts @@ -189,7 +189,9 @@ export type GbnfJsonRefSchema> = {}> /** * Converts a GBNF JSON schema to a TypeScript type */ -export type GbnfJsonSchemaToType = GbnfJsonSchemaToTSType; +export type GbnfJsonSchemaToType = 0 extends 1 & T // if T is `any`, return `any` + ? any + : GbnfJsonSchemaToTSType; export type GbnfJsonSchemaToTSType> = {}> = Readonly extends T From 2c428268450196558eb42b711b8055aa85f86206 Mon Sep 17 00:00:00 2001 From: Gilad S Date: Sat, 9 Aug 2025 05:31:02 +0300 Subject: [PATCH 06/10] test: fix test --- test/modelDependent/llama3.1/tokenBias.test.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/modelDependent/llama3.1/tokenBias.test.ts b/test/modelDependent/llama3.1/tokenBias.test.ts index ed24c5a7..e718e6aa 100644 --- a/test/modelDependent/llama3.1/tokenBias.test.ts +++ b/test/modelDependent/llama3.1/tokenBias.test.ts @@ -25,7 +25,7 @@ describe("llama 3.1", () => { const text = model.detokenize([token]); if (text.toLowerCase().includes("hello")) - customBias.set(token, -0.9); + customBias.set(token, -0.99); else if (text.toLowerCase().includes("hi")) customBias.set(token, "never"); } From 354924b071ba3452351c72b521b48dc0582e5e30 Mon Sep 17 00:00:00 2001 From: Gilad S Date: Sat, 9 Aug 2025 15:05:32 +0300 Subject: [PATCH 07/10] docs: gpt-oss announcement and model links --- docs/blog/v3.12-gpt-oss.md | 142 +++++++++++++++++++++++++++++++++++ package.json | 2 + src/cli/recommendedModels.ts | 24 ++++++ 3 files changed, 168 insertions(+) create mode 100644 docs/blog/v3.12-gpt-oss.md diff --git a/docs/blog/v3.12-gpt-oss.md b/docs/blog/v3.12-gpt-oss.md new file mode 100644 index 00000000..a84d9c0f --- /dev/null +++ b/docs/blog/v3.12-gpt-oss.md @@ -0,0 +1,142 @@ +--- +title: gpt-oss is here! +date: 2025-08-09T20:00:00Z +lastUpdated: false +author: + name: Gilad S. + github: giladgd +category: Release +description: Learn how to use gpt-oss to its full potential with node-llama-cpp +image: + url: https://github.com/user-attachments/assets/df5f1f59-a2cd-4fdb-b60c-3214f4a1584b + alt: "node-llama-cpp + gpt-oss" + width: 3072 + height: 1536 +--- +[`node-llama-cpp`](https://node-llama-cpp.withcat.ai) v3.12 is here, with full support for [`gpt-oss`](https://huggingface.co/openai/gpt-oss-20b) models! + +--- + +## gpt-oss +[`gpt-oss`](https://huggingface.co/openai/gpt-oss-20b) comes in two flavors: +* [`gpt-oss-20b`](https://huggingface.co/openai/gpt-oss-20b) - 21B parameters with 3.6B active parameters +* [`gpt-oss-120b`](https://huggingface.co/openai/gpt-oss-120b) - 117B parameters with 5.1B active parameters + +Here are a few highlights of these models: +* Due to the low number of active parameters, these models are very fast +* These are reasoning models, and you can adjust their reasoning efforts +* They are very good at function calling, and are built with agentic capabilities in mind +* These models were trained with native MXFP4 precision, so no need to quantize them further. + They're small compared to their capabilities already +* They are provided with an Apache 2.0 license, so you can use them in your commercial applications + + +## Recommended Models +Here are some recommended model URIs you can use to try out `gpt-oss` right away: +| Model | Size | URI | +|--------------------------------------------------------------------|--------|-----------------------------------------------------------------------| +| [`gpt-oss-20b`](https://huggingface.co/giladgd/gpt-oss-20b-GGUF) | 12.1GB | `hf:giladgd/gpt-oss-20b-GGUF/gpt-oss-20b.MXFP4.gguf` | +| [`gpt-oss-120b`](https://huggingface.co/giladgd/gpt-oss-120b-GGUF) | 63.4GB | `hf:giladgd/gpt-oss-120b-GGUF/gpt-oss-120b.MXFP4-00001-of-00002.gguf` | + +::: info TIP +[Estimate the compatibility](../cli/inspect/estimate.md) of a model with your machine before downloading it: +```shell +npx -y node-llama-cpp inspect estimate +``` +::: + + +### Try It Using the CLI +To quickly try out [`gpt-oss-20b`](https://huggingface.co/giladgd/gpt-oss-20b-GGUF), you can use the [CLI `chat` command](../cli/chat.md): + +```shell +npx -y node-llama-cpp chat --ef --prompt "Hi there" hf:giladgd/gpt-oss-20b-GGUF/gpt-oss-20b.MXFP4.gguf +``` + + +## Customizing gpt-oss +You can adjust `gpt-oss`'s responses by configuring the options of [`HarmonyChatWrapper`](../api/classes/HarmonyChatWrapper.md): +```typescript +import { + getLlama, resolveModelFile, LlamaChatSession, + HarmonyChatWrapper +} from "node-llama-cpp"; + +const modelUri = "hf:giladgd/gpt-oss-20b-GGUF/gpt-oss-20b.MXFP4.gguf"; + + +const llama = await getLlama(); +const model = await llama.loadModel({ + modelPath: await resolveModelFile(modelUri) +}); +const context = await model.createContext(); +const session = new LlamaChatSession({ + contextSequence: context.getSequence(), + chatWrapper: new HarmonyChatWrapper({ + modelIdentity: "You are ChatGPT, a large language model trained by OpenAI.", + reasoningEffort: "high" + }) +}); + +const q1 = "What is the weather like in SF?"; +console.log("User: " + q1); + +const a1 = await session.prompt(q1); +console.log("AI: " + a1); +``` + +### Using Function Calling +`gpt-oss` models have great support for function calling. +However, these models don't support parallel function calling, so only one function will be called at a time. + +```typescript +import { + getLlama, resolveModelFile, LlamaChatSession, + defineChatSessionFunction +} from "node-llama-cpp"; + +const modelUri = "hf:giladgd/gpt-oss-20b-GGUF/gpt-oss-20b.MXFP4.gguf"; + + +const llama = await getLlama(); +const model = await llama.loadModel({ + modelPath: await resolveModelFile(modelUri) +}); +const context = await model.createContext(); +const session = new LlamaChatSession({ + contextSequence: context.getSequence() +}); + +const functions = { + getCurrentWeather: defineChatSessionFunction({ + description: "Gets the current weather in the provided location.", + params: { + type: "object", + properties: { + location: { + type: "string", + description: "The city and state, e.g. San Francisco, CA" + }, + format: { + enum: ["celsius", "fahrenheit"] + } + } + }, + handler({location, format}) { + console.log(`Getting current weather for "${location}" in ${format}`); + + return { + // simulate a weather API response + temperature: format === "celsius" ? 20 : 68, + format + }; + } + }) +}; + +const q1 = "What is the weather like in SF?"; +console.log("User: " + q1); + +const a1 = await session.prompt(q1, {functions}); +console.log("AI: " + a1); +``` diff --git a/package.json b/package.json index d5a819e3..c1bdcb39 100644 --- a/package.json +++ b/package.json @@ -113,6 +113,8 @@ "deepseek", "qwen", "qwq", + "gpt", + "gpt-oss", "typescript", "lora", "batching", diff --git a/src/cli/recommendedModels.ts b/src/cli/recommendedModels.ts index 1609a780..458c9b3b 100644 --- a/src/cli/recommendedModels.ts +++ b/src/cli/recommendedModels.ts @@ -1,6 +1,30 @@ import {ModelRecommendation} from "./utils/resolveModelRecommendationFileOptions.js"; export const recommendedModels: ModelRecommendation[] = [{ + name: "gpt-oss 20B", + abilities: ["chat", "complete", "functionCalling", "reasoning"], + description: "gpt-oss models were created by OpenAI and are using chain of though (CoT) to reason across a wide variety of topics, and utilize a Mixture of Experts architecture.\n" + + "It's optimized for agentic cases, with native support for function calling.\n" + + "Mixtures of Experts (MoE) is a technique where different models, each skilled in solving a particular kind of problem, work together to the improve the overall performance on complex tasks.\n" + + "This model only has 3.6B active parameters, thus making it very fast.\n" + + "This is the 20 billion parameters version of the model.", + + fileOptions: [ + "hf:giladgd/gpt-oss-20b-GGUF/gpt-oss-20b.MXFP4.gguf" + ] +}, { + name: "gpt-oss 120B", + abilities: ["chat", "complete", "functionCalling", "reasoning"], + description: "gpt-oss models were created by OpenAI and are using chain of though (CoT) to reason across a wide variety of topics, and utilize a Mixture of Experts architecture.\n" + + "It's optimized for agentic cases, with native support for function calling.\n" + + "Mixtures of Experts (MoE) is a technique where different models, each skilled in solving a particular kind of problem, work together to the improve the overall performance on complex tasks.\n" + + "This model only has 5.1B active parameters, thus making it very fast.\n" + + "This is the 120 billion parameters version of the model.", + + fileOptions: [ + "hf:giladgd/gpt-oss-120b-GGUF/gpt-oss-120b.MXFP4-00001-of-00002.gguf" + ] +}, { name: "Qwen 3 32B", abilities: ["chat", "complete", "functionCalling", "reasoning"], description: "Qwen model was created by Alibaba and is using chain of though (CoT) to reason across a wide variety of topics.\n" + From b9a87674fc8148e4ffd4801d61c0aba6030dca3a Mon Sep 17 00:00:00 2001 From: Gilad S Date: Sat, 9 Aug 2025 15:08:19 +0300 Subject: [PATCH 08/10] docs: typo --- docs/blog/v3.12-gpt-oss.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/blog/v3.12-gpt-oss.md b/docs/blog/v3.12-gpt-oss.md index a84d9c0f..e3e17500 100644 --- a/docs/blog/v3.12-gpt-oss.md +++ b/docs/blog/v3.12-gpt-oss.md @@ -1,6 +1,6 @@ --- title: gpt-oss is here! -date: 2025-08-09T20:00:00Z +date: 2025-08-09T15:00:00Z lastUpdated: false author: name: Gilad S. From 2171284e8969b0a2e1aa9b6cf2e6a8295f01d7b5 Mon Sep 17 00:00:00 2001 From: Gilad S Date: Sat, 9 Aug 2025 15:13:22 +0300 Subject: [PATCH 09/10] docs: update links --- .vitepress/theme/index.ts | 6 +++--- README.md | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.vitepress/theme/index.ts b/.vitepress/theme/index.ts index c1119d10..de7759d7 100644 --- a/.vitepress/theme/index.ts +++ b/.vitepress/theme/index.ts @@ -19,9 +19,9 @@ import type {EnhanceAppContext} from "vitepress"; export default { extends: Theme, Layout: () => { - const text = "DeepSeek R1 is here!"; - const link = "/blog/v3.6-deepseek-r1"; - const hideDate = new Date("2025-06-01T00:00:00Z"); + const text = "gpt-oss is here!"; + const link = "/blog/v3.12-gpt-oss"; + const hideDate = new Date("2025-11-01T00:00:00Z"); return h(LayoutContainer, null, h(Theme.Layout, null, { "home-hero-info-before": () => h(LatestVersionHomeBadge, { diff --git a/README.md b/README.md index 47508919..f7a33bb3 100644 --- a/README.md +++ b/README.md @@ -15,7 +15,7 @@ -✨ [DeepSeek R1 is here!](https://node-llama-cpp.withcat.ai/blog/v3.6-deepseek-r1) ✨ +✨ [`gpt-oss` is here!](https://node-llama-cpp.withcat.ai/blog/v3.12-gpt-oss) ✨ ## Features * Run LLMs locally on your machine From 9f8d27b3e38841c716638ddbf9ae38df1808d072 Mon Sep 17 00:00:00 2001 From: Gilad S Date: Sat, 9 Aug 2025 20:19:47 +0300 Subject: [PATCH 10/10] fix: Qwen3 memory estimation --- docs/blog/v3.12-gpt-oss.md | 2 +- src/gguf/insights/GgufInsights.ts | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/blog/v3.12-gpt-oss.md b/docs/blog/v3.12-gpt-oss.md index e3e17500..9a09fa13 100644 --- a/docs/blog/v3.12-gpt-oss.md +++ b/docs/blog/v3.12-gpt-oss.md @@ -1,6 +1,6 @@ --- title: gpt-oss is here! -date: 2025-08-09T15:00:00Z +date: 2025-08-09T18:00:00Z lastUpdated: false author: name: Gilad S. diff --git a/src/gguf/insights/GgufInsights.ts b/src/gguf/insights/GgufInsights.ts index 53895600..f32ceb0d 100644 --- a/src/gguf/insights/GgufInsights.ts +++ b/src/gguf/insights/GgufInsights.ts @@ -310,6 +310,8 @@ export class GgufInsights { // ) // ); // } + } else if (this._ggufFileInfo.metadata.general?.architecture === GgufArchitectureType.qwen3) { + return int32TBytes * batchSize * (embeddingLength + (kvSize * headCount)); } else if (expertCount > 0) { const expertsUsedCount = this._ggufFileInfo.architectureMetadata.expert_used_count ?? 2;