@@ -606,6 +606,7 @@ const char * common_chat_format_name(common_chat_format format) {
606606 case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1: return " Functionary v3.1 Llama 3.1" ;
607607 case COMMON_CHAT_FORMAT_HERMES_2_PRO: return " Hermes 2 Pro" ;
608608 case COMMON_CHAT_FORMAT_COMMAND_R7B: return " Command R7B" ;
609+ case COMMON_CHAT_FORMAT_GRANITE: return " Granite" ;
609610 case COMMON_CHAT_FORMAT_GPT_OSS: return " GPT-OSS" ;
610611 default :
611612 throw std::runtime_error (" Unknown chat format" );
@@ -618,6 +619,7 @@ const char * common_reasoning_format_name(common_reasoning_format format) {
618619 case COMMON_REASONING_FORMAT_AUTO: return " auto" ;
619620 case COMMON_REASONING_FORMAT_DEEPSEEK: return " deepseek" ;
620621 case COMMON_REASONING_FORMAT_DEEPSEEK_LEGACY: return " deepseek-legacy" ;
622+ case COMMON_REASONING_FORMAT_GRANITE: return " granite" ;
621623 default :
622624 throw std::runtime_error (" Unknown reasoning format" );
623625 }
@@ -1734,6 +1736,124 @@ static void common_chat_parse_hermes_2_pro(common_chat_msg_parser & builder) {
17341736 builder.add_content (builder.consume_rest ());
17351737}
17361738
1739+ static common_chat_params common_chat_params_init_granite (const common_chat_template & tmpl, const struct templates_params & inputs) {
1740+ common_chat_params data;
1741+
1742+ // Pass thinking context for Granite template
1743+ json additional_context = {
1744+ {" thinking" , inputs.enable_thinking },
1745+ };
1746+
1747+ data.prompt = apply (tmpl, inputs, /* messages_override= */ std::nullopt , /* tools_override= */ std::nullopt , additional_context);
1748+ data.format = COMMON_CHAT_FORMAT_GRANITE;
1749+
1750+ if (string_ends_with (data.prompt , " <think>\n " ) || string_ends_with (data.prompt , " <think>" )) {
1751+ if (!inputs.enable_thinking ) {
1752+ data.prompt += " </think>" ;
1753+ } else {
1754+ data.thinking_forced_open = true ;
1755+ }
1756+ }
1757+
1758+ if (!inputs.tools .is_null ()) {
1759+ // Granite uses <|tool_call|> followed by JSON list
1760+ data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
1761+ data.grammar = build_grammar ([&](const common_grammar_builder & builder) {
1762+ std::vector<std::string> tool_rules;
1763+ foreach_function (inputs.tools , [&](const json & tool) {
1764+ const auto & function = tool.at (" function" );
1765+ std::string name = function.at (" name" );
1766+ auto parameters = function.at (" parameters" );
1767+ builder.resolve_refs (parameters);
1768+ tool_rules.push_back (builder.add_rule (name + " -call" , builder.add_schema (name +
1769+ " -args" , {
1770+ {" type" , " object" },
1771+ {" properties" , {
1772+ {" name" , {{" const" , name}}},
1773+ {" arguments" , parameters},
1774+ }},
1775+ {" required" , json::array ({" name" , " arguments" })},
1776+ })));
1777+ });
1778+
1779+ auto tool_call = builder.add_rule (" tool_call" , string_join (tool_rules, " | " ));
1780+ auto tool_list = builder.add_rule (" tool_list" , " \" [\" space " + tool_call + " (\" ,\" space " + tool_call + " )* space \" ]\" " );
1781+
1782+ if (data.thinking_forced_open ) {
1783+ builder.add_rule (" root" , " \" </think>\" space \" <response>\" space [^<]* \" </response>\" space \" <|tool_call|>\" space " + tool_list);
1784+ } else {
1785+ builder.add_rule (" root" , " \" <|tool_call|>\" space " + tool_list);
1786+ }
1787+
1788+ data.grammar_triggers .push_back ({
1789+ COMMON_GRAMMAR_TRIGGER_TYPE_WORD,
1790+ " <|tool_call|>"
1791+ });
1792+
1793+ data.preserved_tokens = {
1794+ " <think>" ,
1795+ " </think>" ,
1796+ " <response>" ,
1797+ " </response>" ,
1798+ " <|tool_call|>" ,
1799+ };
1800+ });
1801+ } else {
1802+ // Handle thinking tags for non-tool responses
1803+ if (data.thinking_forced_open && inputs.enable_thinking ) {
1804+ data.grammar_lazy = false ;
1805+ data.grammar = build_grammar ([&](const common_grammar_builder & builder) {
1806+ builder.add_rule (" root" , " \" </think>\" space \" <response>\" space .* \" </response>\" space" );
1807+ });
1808+ data.preserved_tokens = {
1809+ " <think>" ,
1810+ " </think>" ,
1811+ " <response>" ,
1812+ " </response>" ,
1813+ };
1814+ }
1815+ }
1816+
1817+ return data;
1818+ }
1819+
1820+ static void common_chat_parse_granite (common_chat_msg_parser & builder) {
1821+ // Parse thinking tags
1822+ builder.try_parse_reasoning (" <think>" , " </think>" );
1823+
1824+ // Parse response tags using regex
1825+ static const common_regex response_regex (" <response>([\\ s\\ S]*?)</response>" );
1826+ if (auto res = builder.try_find_regex (response_regex)) {
1827+ // Extract the content between the tags (capture group 1)
1828+ auto content = builder.str (res->groups [1 ]);
1829+ builder.add_content (content);
1830+ builder.move_to (res->groups [0 ].end );
1831+ }
1832+
1833+ if (!builder.syntax ().parse_tool_calls ) {
1834+ builder.add_content (builder.consume_rest ());
1835+ return ;
1836+ }
1837+
1838+ // Look for tool calls
1839+ static const common_regex tool_call_regex (regex_escape (" <|tool_call|>" ));
1840+ if (auto res = builder.try_find_regex (tool_call_regex)) {
1841+ builder.move_to (res->groups [0 ].end );
1842+
1843+ // Expect JSON array of tool calls
1844+ auto tool_calls_data = builder.consume_json ();
1845+ if (tool_calls_data.json .is_array ()) {
1846+ if (!builder.add_tool_calls (tool_calls_data.json )) {
1847+ builder.add_content (" <|tool_call|>" + tool_calls_data.json .dump ());
1848+ }
1849+ } else {
1850+ builder.add_content (" <|tool_call|>" + tool_calls_data.json .dump ());
1851+ }
1852+ } else {
1853+ builder.add_content (builder.consume_rest ());
1854+ }
1855+ }
1856+
17371857static common_chat_params common_chat_params_init_without_tools (const common_chat_template & tmpl, const struct templates_params & inputs) {
17381858 common_chat_params data;
17391859 data.prompt = apply (tmpl, inputs);
@@ -1805,6 +1925,11 @@ static common_chat_params common_chat_templates_apply_jinja(
18051925 return common_chat_params_init_command_r7b (tmpl, params);
18061926 }
18071927
1928+ // Granite (IBM) - detects thinking / tools support
1929+ if (src.find (" elif thinking" ) != std::string::npos && src.find (" <|tool_call|>" ) != std::string::npos) {
1930+ return common_chat_params_init_granite (tmpl, params);
1931+ }
1932+
18081933 // Hermes 2/3 Pro, Qwen 2.5 Instruct (w/ tools)
18091934 if (src.find (" <tool_call>" ) != std::string::npos && params.json_schema .is_null ()) {
18101935 return common_chat_params_init_hermes_2_pro (tmpl, params);
@@ -1865,6 +1990,7 @@ static common_chat_params common_chat_templates_apply_legacy(
18651990 int alloc_size = 0 ;
18661991 std::vector<llama_chat_message> chat;
18671992 std::vector<std::string> contents;
1993+
18681994 for (const auto & msg : inputs.messages ) {
18691995 auto content = msg.content ;
18701996 for (const auto & part : msg.content_parts ) {
@@ -1966,6 +2092,9 @@ static void common_chat_parse(common_chat_msg_parser & builder) {
19662092 case COMMON_CHAT_FORMAT_COMMAND_R7B:
19672093 common_chat_parse_command_r7b (builder);
19682094 break ;
2095+ case COMMON_CHAT_FORMAT_GRANITE:
2096+ common_chat_parse_granite (builder);
2097+ break ;
19692098 case COMMON_CHAT_FORMAT_GPT_OSS:
19702099 common_chat_parse_gpt_oss (builder);
19712100 break ;
0 commit comments