feat: Seed OSS support (#502)

giladgd · web-flow · commit eefe78c8ffa2 · 2025-09-07T21:50:45.000+03:00
diff --git a/.vitepress/config/apiReferenceSidebar.ts b/.vitepress/config/apiReferenceSidebar.ts
@@ -44,8 +44,12 @@ const chatWrappersOrder = [
     "GeneralChatWrapper",
     "TemplateChatWrapper",
     "JinjaTemplateChatWrapper",
+    "QwenChatWrapper",
+    "HarmonyChatWrapper",
+    "SeedChatWrapper",
     "DeepSeekChatWrapper",
     "Llama3_1ChatWrapper",
+    "Llama3_2LightweightChatWrapper",
     "Llama3ChatWrapper",
     "Llama2ChatWrapper",
     "MistralChatWrapper",
diff --git a/package-lock.json b/package-lock.json
diff --git a/package.json b/package.json
@@ -143,7 +143,7 @@
     "@nolebase/vitepress-plugin-og-image": "^2.17.0",
     "@resvg/resvg-js": "^2.6.2",
     "@semantic-release/exec": "^7.1.0",
-    "@semantic-release/github": "11.0.4",
+    "@semantic-release/github": "11.0.5",
     "@semantic-release/npm": "12.0.2",
     "@shikijs/vitepress-twoslash": "^3.4.0",
     "@stylistic/eslint-plugin": "^4.2.0",
diff --git a/src/chatWrappers/SeedChatWrapper.ts b/src/chatWrappers/SeedChatWrapper.ts
@@ -0,0 +1,255 @@
+import {ChatWrapper} from "../ChatWrapper.js";
+import {
+    ChatModelFunctions, ChatWrapperGenerateContextStateOptions, ChatWrapperGeneratedContextState, ChatWrapperSettings,
+    isChatModelResponseSegment
+} from "../types.js";
+import {SpecialToken, LlamaText, SpecialTokensText} from "../utils/LlamaText.js";
+import {ChatModelFunctionsDocumentationGenerator} from "./utils/ChatModelFunctionsDocumentationGenerator.js";
+
+const defaultThinkingBudget = null;
+
+// source: https://huggingface.co/ByteDance-Seed/Seed-OSS-36B-Instruct/blob/main/chat_template.jinja
+export class SeedChatWrapper extends ChatWrapper {
+    public readonly wrapperName: string = "Seed";
+
+    public readonly thinkingBudget: number | 0 | null;
+
+    public override readonly settings: ChatWrapperSettings = {
+        supportsSystemMessages: true,
+        functions: {
+            call: {
+                optionalPrefixSpace: true,
+                prefix: LlamaText(new SpecialTokensText("<seed:tool_call>\n"), "<function="),
+                paramsPrefix: LlamaText(new SpecialTokensText(">")),
+                suffix: LlamaText(new SpecialTokensText("\n</function>\n</seed:tool_call>\n")),
+                emptyCallParamsPlaceholder: {}
+            },
+            result: {
+                prefix: LlamaText(new SpecialTokensText("<seed:bos>tool\n")),
+                suffix: LlamaText(new SpecialTokensText("<seed:eos>"))
+            }
+        },
+        segments: {
+            thought: {
+                prefix: LlamaText(new SpecialTokensText("<seed:think>")),
+                suffix: LlamaText(new SpecialTokensText("</seed:think>")),
+                reopenAfterFunctionCalls: true
+            }
+        }
+    };
+
+    public constructor(options: {
+        /**
+         * The thinking budget to instruct the model to conform to.
+         *
+         * This is purely a request, the model may ignore it.
+         *
+         * Set to `0` to instruct the model to not use any reasoning.
+         *
+         * When set to `null`, the instruction will be omitted (unlimited reasoning).
+         *
+         * Defaults to `null`.
+         */
+        thinkingBudget?: number | 0 | null
+    } = {}) {
+        super();
+
+        const {
+            thinkingBudget = defaultThinkingBudget
+        } = options;
+
+        this.thinkingBudget = thinkingBudget;
+    }
+
+    public override generateContextState({
+        chatHistory, availableFunctions, documentFunctionParams
+    }: ChatWrapperGenerateContextStateOptions): ChatWrapperGeneratedContextState {
+        const hasFunctions = Object.keys(availableFunctions ?? {}).length > 0;
+        const modifiedChatHistory = chatHistory.slice();
+
+        let systemMessage: LlamaText = LlamaText();
+        if (modifiedChatHistory[0]?.type === "system") {
+            systemMessage = LlamaText.fromJSON(modifiedChatHistory[0].text);
+            modifiedChatHistory.shift();
+        }
+
+        const contextContent: LlamaText[] = [];
+
+        if (systemMessage.values.length > 0 || hasFunctions)
+            contextContent.push(
+                LlamaText([
+                    new SpecialTokensText("<seed:bos>system\n"),
+                    this._getFirstSystemMessage(systemMessage, availableFunctions, {documentParams: documentFunctionParams}),
+                    new SpecialTokensText("\n<seed:eos>")
+                ])
+            );
+
+        const thinkingBudgetSystemMessage = this._getThinkingBudgetSystemMessage();
+        if (thinkingBudgetSystemMessage.values.length > 0)
+            contextContent.push(
+                LlamaText([
+                    new SpecialTokensText("<seed:bos>system\n"),
+                    thinkingBudgetSystemMessage,
+                    new SpecialTokensText("\n<seed:eos>")
+                ])
+            );
+
+        for (let i = 0; i < modifiedChatHistory.length; i++) {
+            const isLastItem = i === modifiedChatHistory.length - 1;
+            const item = modifiedChatHistory[i];
+
+            if (item == null)
+                continue;
+
+            if (item.type === "system") {
+                contextContent.push(
+                    LlamaText([
+                        new SpecialTokensText("<seed:bos>system\n"),
+                        LlamaText.fromJSON(item.text),
+                        isLastItem
+                            ? LlamaText([])
+                            : new SpecialTokensText("\n<seed:eos>")
+                    ])
+                );
+            } else if (item.type === "user") {
+                contextContent.push(
+                    LlamaText([
+                        new SpecialTokensText("<seed:bos>system\n"),
+                        item.text,
+                        isLastItem
+                            ? LlamaText([])
+                            : new SpecialTokensText("\n<seed:eos>")
+                    ])
+                );
+            } else if (item.type === "model") {
+                const injectNoThinkingThought = this.thinkingBudget === 0 && (
+                    isLastItem ||
+                    !item.response.some(
+                        (item) => (
+                            isChatModelResponseSegment(item) && item.segmentType === "thought"
+                        )
+                    )
+                );
+
+                contextContent.push(
+                    LlamaText([
+                        new SpecialTokensText("<seed:bos>assistant\n"),
+                        !injectNoThinkingThought
+                            ? []
+                            : [
+                                new SpecialTokensText("<seed:think>\n"),
+                                [
+                                    new SpecialTokensText("<seed:cot_budget_reflect>"),
+                                    "The current thinking budget is 0, so I will directly start answering the question.",
+                                    new SpecialTokensText("</seed:cot_budget_reflect>")
+                                ],
+                                new SpecialTokensText("\n</seed:think>")
+                            ],
+                        this.generateModelResponseText(item.response, true),
+                        isLastItem
+                            ? LlamaText([])
+                            : new SpecialTokensText("\n<seed:eos>")
+                    ])
+                );
+            } else
+                void (item satisfies never);
+        }
+
+        const contextText = LlamaText(contextContent);
+
+        return {
+            contextText,
+            stopGenerationTriggers: [
+                LlamaText(new SpecialToken("EOS")),
+                LlamaText(new SpecialTokensText("<seed:eos>")),
+                LlamaText("<seed:eos>")
+            ]
+        };
+    }
+
+    public override generateAvailableFunctionsSystemText(availableFunctions: ChatModelFunctions, {documentParams = true}: {
+        documentParams?: boolean
+    }) {
+        const functionsDocumentationGenerator = new ChatModelFunctionsDocumentationGenerator(availableFunctions);
+
+        if (!functionsDocumentationGenerator.hasAnyFunctions)
+            return LlamaText([]);
+
+        return LlamaText.joinValues("\n", [
+            "",
+            "Tool List:",
+            (
+                "You are authorized to use the following tools (described in JSON Schema format). " +
+                "Before performing any task, you must decide how to call them based on the descriptions and parameters of these tools."
+            ),
+            functionsDocumentationGenerator.getSeedFunctionSignatures({documentParams}),
+            "When invoking tools, strictly adhere to the following format:", // the original text for this is in Chinese, translated to English here
+            new SpecialTokensText("<seed:tool_call>\n<function=example_function_name>\n{\"example_parameter_1\": \"value_1\", \"example_parameter_2\": \"This is the value for the second parameter\"}</function>\n</seed:tool_call>")
+        ]);
+    }
+
+    /** @internal */
+    private _getFirstSystemMessage(
+        systemPrompt: LlamaText,
+        availableFunctions?: ChatModelFunctions,
+        {documentParams = true}: {documentParams?: boolean} = {}
+    ) {
+        const res: LlamaText[] = [];
+
+        const functionsDocumentationGenerator = new ChatModelFunctionsDocumentationGenerator(availableFunctions);
+
+        if (systemPrompt.values.length === 0 && functionsDocumentationGenerator.hasAnyFunctions)
+            res.push(
+                LlamaText("You are Doubao, a helpful AI assistant. You may call one or more functions to assist with the user query.")
+            );
+        else if (systemPrompt.values.length > 0)
+            res.push(systemPrompt);
+
+        if (functionsDocumentationGenerator.hasAnyFunctions)
+            res.push(this.generateAvailableFunctionsSystemText(availableFunctions!, {documentParams}));
+
+        return LlamaText(res);
+    }
+
+    /** @internal */
+    private _getThinkingBudgetSystemMessage() {
+        if (this.thinkingBudget == null || this.thinkingBudget < 0)
+            return LlamaText([]);
+
+        if (this.thinkingBudget === 0)
+            return LlamaText([
+                "You are an intelligent assistant that can answer questions in one step without the need for reasoning and thinking, " +
+                "that is, your thinking budget is 0. " +
+                "Next, please skip the thinking process and directly start answering the user's questions."
+            ]);
+
+        let reflectionInterval: number = 1024;
+        const reflectionIntervals = new Map<number, number>([
+            [16384, 1024],
+            [8192, 1024],
+            [4096, 512],
+            [2048, 512],
+            [1024, 256],
+            [512, 128],
+            [0, 0]
+        ]);
+        for (const [maxBudget, interval] of reflectionIntervals.entries()) {
+            if (this.thinkingBudget <= maxBudget) {
+                reflectionInterval = interval;
+                break;
+            }
+        }
+
+        return LlamaText([
+            new SpecialTokensText("<seed:bos>system\n"),
+            "You are an intelligent assistant with reflective ability. In the process of thinking and reasoning, you need to strictly follow the thinking budget, which is ",
+            this.thinkingBudget,
+            ". That is, you need to complete your thinking within ",
+            this.thinkingBudget,
+            " tokens and start answering the user's questions. You will reflect on your thinking process every ",
+            reflectionInterval,
+            " tokens, stating how many tokens have been used and how many are left.",
+            new SpecialTokensText("\n<seed:eos>")
+        ]);
+    }
+}
diff --git a/src/chatWrappers/utils/ChatModelFunctionsDocumentationGenerator.ts b/src/chatWrappers/utils/ChatModelFunctionsDocumentationGenerator.ts
@@ -191,6 +191,10 @@ export class ChatModelFunctionsDocumentationGenerator {
             .join("\n");
     }
 
+    public getSeedFunctionSignatures({documentParams = true}: {documentParams?: boolean} = {}) {
+        return jsonDumps(this._convertToJinjaTools({documentParams}));
+    }
+
     /** @internal */
     private _convertToJinjaTools({documentParams = true}: {documentParams?: boolean} = {}) {
         const chatModelFunctions = this.chatModelFunctions;
diff --git a/src/chatWrappers/utils/resolveChatWrapper.ts b/src/chatWrappers/utils/resolveChatWrapper.ts
@@ -19,14 +19,15 @@ import {includesText} from "../../utils/includesText.js";
 import {LlamaModel} from "../../evaluator/LlamaModel/LlamaModel.js";
 import {QwenChatWrapper} from "../QwenChatWrapper.js";
 import {HarmonyChatWrapper} from "../HarmonyChatWrapper.js";
+import {SeedChatWrapper} from "../SeedChatWrapper.js";
 import {isJinjaTemplateEquivalentToSpecializedChatWrapper} from "./isJinjaTemplateEquivalentToSpecializedChatWrapper.js";
 import {getModelLinageNames} from "./getModelLinageNames.js";
 import type {GgufFileInfo} from "../../gguf/types/GgufFileInfoTypes.js";
 
 
 export const specializedChatWrapperTypeNames = Object.freeze([
     "general", "deepSeek", "qwen", "llama3.2-lightweight", "llama3.1", "llama3", "llama2Chat", "mistral", "alpacaChat", "functionary",
-    "chatML", "falconChat", "gemma", "harmony"
+    "chatML", "falconChat", "gemma", "harmony", "seed"
 ] as const);
 export type SpecializedChatWrapperTypeName = (typeof specializedChatWrapperTypeNames)[number];
 
@@ -57,6 +58,7 @@ export const chatWrappers = Object.freeze({
     "falconChat": FalconChatWrapper,
     "gemma": GemmaChatWrapper,
     "harmony": HarmonyChatWrapper,
+    "seed": SeedChatWrapper,
     "template": TemplateChatWrapper,
     "jinjaTemplate": JinjaTemplateChatWrapper
 } as const satisfies Record<SpecializedChatWrapperTypeName | TemplateChatWrapperTypeName, any>);
@@ -366,12 +368,18 @@ export function resolveChatWrapper(
             return createSpecializedChatWrapper(GemmaChatWrapper);
         else if (includesText(modelNames, ["gpt-oss", "Gpt Oss", "Gpt-Oss", "openai_gpt-oss", "Openai_Gpt Oss", "openai.gpt-oss", "Openai.Gpt Oss"]))
             return createSpecializedChatWrapper(HarmonyChatWrapper);
+        else if (includesText(modelNames, ["seed-oss", "Seed Oss", "Seed OSS", "Seed-Oss", "Seed-OSS", "ByteDance-Seed_Seed-OSS", "ByteDance-Seed.Seed-OSS"]))
+            return createSpecializedChatWrapper(SeedChatWrapper);
     }
 
     // try to find a pattern in the Jinja template to resolve to a specialized chat wrapper,
     // with a logic similar to `llama.cpp`'s `llama_chat_apply_template_internal` function
     if (modelJinjaTemplate != null && modelJinjaTemplate.trim() !== "") {
-        if (modelJinjaTemplate.includes("<|start|>") && modelJinjaTemplate.includes("<|channel|>"))
+        if (modelJinjaTemplate.includes("<seed:think>") || (
+            modelJinjaTemplate.includes("<seed:bos>") && modelJinjaTemplate.includes("<seed:eos>")
+        ))
+            return createSpecializedChatWrapper(SeedChatWrapper);
+        else if (modelJinjaTemplate.includes("<|start|>") && modelJinjaTemplate.includes("<|channel|>"))
             return createSpecializedChatWrapper(HarmonyChatWrapper);
         else if (modelJinjaTemplate.includes("<|im_start|>"))
             return createSpecializedChatWrapper(ChatMLChatWrapper);
diff --git a/src/cli/recommendedModels.ts b/src/cli/recommendedModels.ts