@@ -606,6 +606,7 @@ const char * common_chat_format_name(common_chat_format format) {
606606 case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1: return " Functionary v3.1 Llama 3.1" ;
607607 case COMMON_CHAT_FORMAT_HERMES_2_PRO: return " Hermes 2 Pro" ;
608608 case COMMON_CHAT_FORMAT_COMMAND_R7B: return " Command R7B" ;
609+ case COMMON_CHAT_FORMAT_GRANITE: return " Granite" ;
609610 default :
610611 throw std::runtime_error (" Unknown chat format" );
611612 }
@@ -616,6 +617,7 @@ const char * common_reasoning_format_name(common_reasoning_format format) {
616617 case COMMON_REASONING_FORMAT_NONE: return " none" ;
617618 case COMMON_REASONING_FORMAT_DEEPSEEK: return " deepseek" ;
618619 case COMMON_REASONING_FORMAT_DEEPSEEK_LEGACY: return " deepseek-legacy" ;
620+ case COMMON_REASONING_FORMAT_GRANITE: return " granite" ;
619621 default :
620622 throw std::runtime_error (" Unknown reasoning format" );
621623 }
@@ -1712,6 +1714,124 @@ static void common_chat_parse_hermes_2_pro(common_chat_msg_parser & builder) {
17121714 builder.add_content (builder.consume_rest ());
17131715}
17141716
1717+ static common_chat_params common_chat_params_init_granite (const common_chat_template & tmpl, const struct templates_params & inputs) {
1718+ common_chat_params data;
1719+
1720+ // Pass thinking context for Granite template
1721+ json additional_context = {
1722+ {" thinking" , inputs.enable_thinking },
1723+ };
1724+
1725+ data.prompt = apply (tmpl, inputs, /* messages_override= */ std::nullopt , /* tools_override= */ std::nullopt , additional_context);
1726+ data.format = COMMON_CHAT_FORMAT_GRANITE;
1727+
1728+ if (string_ends_with (data.prompt , " <think>\n " ) || string_ends_with (data.prompt , " <think>" )) {
1729+ if (!inputs.enable_thinking ) {
1730+ data.prompt += " </think>" ;
1731+ } else {
1732+ data.thinking_forced_open = true ;
1733+ }
1734+ }
1735+
1736+ if (!inputs.tools .is_null ()) {
1737+ // Granite uses <|tool_call|> followed by JSON list
1738+ data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
1739+ data.grammar = build_grammar ([&](const common_grammar_builder & builder) {
1740+ std::vector<std::string> tool_rules;
1741+ foreach_function (inputs.tools , [&](const json & tool) {
1742+ const auto & function = tool.at (" function" );
1743+ std::string name = function.at (" name" );
1744+ auto parameters = function.at (" parameters" );
1745+ builder.resolve_refs (parameters);
1746+ tool_rules.push_back (builder.add_rule (name + " -call" , builder.add_schema (name +
1747+ " -args" , {
1748+ {" type" , " object" },
1749+ {" properties" , {
1750+ {" name" , {{" const" , name}}},
1751+ {" arguments" , parameters},
1752+ }},
1753+ {" required" , json::array ({" name" , " arguments" })},
1754+ })));
1755+ });
1756+
1757+ auto tool_call = builder.add_rule (" tool_call" , string_join (tool_rules, " | " ));
1758+ auto tool_list = builder.add_rule (" tool_list" , " \" [\" space " + tool_call + " (\" ,\" space " + tool_call + " )* space \" ]\" " );
1759+
1760+ if (data.thinking_forced_open ) {
1761+ builder.add_rule (" root" , " \" </think>\" space \" <response>\" space [^<]* \" </response>\" space \" <|tool_call|>\" space " + tool_list);
1762+ } else {
1763+ builder.add_rule (" root" , " \" <|tool_call|>\" space " + tool_list);
1764+ }
1765+
1766+ data.grammar_triggers .push_back ({
1767+ COMMON_GRAMMAR_TRIGGER_TYPE_WORD,
1768+ " <|tool_call|>"
1769+ });
1770+
1771+ data.preserved_tokens = {
1772+ " <think>" ,
1773+ " </think>" ,
1774+ " <response>" ,
1775+ " </response>" ,
1776+ " <|tool_call|>" ,
1777+ };
1778+ });
1779+ } else {
1780+ // Handle thinking tags for non-tool responses
1781+ if (data.thinking_forced_open && inputs.enable_thinking ) {
1782+ data.grammar_lazy = false ;
1783+ data.grammar = build_grammar ([&](const common_grammar_builder & builder) {
1784+ builder.add_rule (" root" , " \" </think>\" space \" <response>\" space .* \" </response>\" space" );
1785+ });
1786+ data.preserved_tokens = {
1787+ " <think>" ,
1788+ " </think>" ,
1789+ " <response>" ,
1790+ " </response>" ,
1791+ };
1792+ }
1793+ }
1794+
1795+ return data;
1796+ }
1797+
1798+ static void common_chat_parse_granite (common_chat_msg_parser & builder) {
1799+ // Parse thinking tags
1800+ builder.try_parse_reasoning (" <think>" , " </think>" );
1801+
1802+ // Parse response tags using regex
1803+ static const common_regex response_regex (" <response>([\\ s\\ S]*?)</response>" );
1804+ if (auto res = builder.try_find_regex (response_regex)) {
1805+ // Extract the content between the tags (capture group 1)
1806+ auto content = builder.str (res->groups [1 ]);
1807+ builder.add_content (content);
1808+ builder.move_to (res->groups [0 ].end );
1809+ }
1810+
1811+ if (!builder.syntax ().parse_tool_calls ) {
1812+ builder.add_content (builder.consume_rest ());
1813+ return ;
1814+ }
1815+
1816+ // Look for tool calls
1817+ static const common_regex tool_call_regex (regex_escape (" <|tool_call|>" ));
1818+ if (auto res = builder.try_find_regex (tool_call_regex)) {
1819+ builder.move_to (res->groups [0 ].end );
1820+
1821+ // Expect JSON array of tool calls
1822+ auto tool_calls_data = builder.consume_json ();
1823+ if (tool_calls_data.json .is_array ()) {
1824+ if (!builder.add_tool_calls (tool_calls_data.json )) {
1825+ builder.add_content (" <|tool_call|>" + tool_calls_data.json .dump ());
1826+ }
1827+ } else {
1828+ builder.add_content (" <|tool_call|>" + tool_calls_data.json .dump ());
1829+ }
1830+ } else {
1831+ builder.add_content (builder.consume_rest ());
1832+ }
1833+ }
1834+
17151835static common_chat_params common_chat_params_init_without_tools (const common_chat_template & tmpl, const struct templates_params & inputs) {
17161836 common_chat_params data;
17171837 data.prompt = apply (tmpl, inputs);
@@ -1783,6 +1903,11 @@ static common_chat_params common_chat_templates_apply_jinja(
17831903 return common_chat_params_init_command_r7b (tmpl, params);
17841904 }
17851905
1906+ // Granite (IBM) - detects thinking / tools support
1907+ if (src.find (" elif thinking" ) != std::string::npos && src.find (" <|tool_call|>" ) != std::string::npos) {
1908+ return common_chat_params_init_granite (tmpl, params);
1909+ }
1910+
17861911 // Hermes 2/3 Pro, Qwen 2.5 Instruct (w/ tools)
17871912 if (src.find (" <tool_call>" ) != std::string::npos && params.json_schema .is_null ()) {
17881913 return common_chat_params_init_hermes_2_pro (tmpl, params);
@@ -1838,6 +1963,7 @@ static common_chat_params common_chat_templates_apply_legacy(
18381963 int alloc_size = 0 ;
18391964 std::vector<llama_chat_message> chat;
18401965 std::vector<std::string> contents;
1966+
18411967 for (const auto & msg : inputs.messages ) {
18421968 auto content = msg.content ;
18431969 for (const auto & part : msg.content_parts ) {
@@ -1939,6 +2065,9 @@ static void common_chat_parse(common_chat_msg_parser & builder) {
19392065 case COMMON_CHAT_FORMAT_COMMAND_R7B:
19402066 common_chat_parse_command_r7b (builder);
19412067 break ;
2068+ case COMMON_CHAT_FORMAT_GRANITE:
2069+ common_chat_parse_granite (builder);
2070+ break ;
19422071 default :
19432072 throw std::runtime_error (std::string (" Unsupported format: " ) + common_chat_format_name (builder.syntax ().format ));
19442073 }
0 commit comments