Skip to content

Commit 13a8ecc

Browse files
committed
Adding support for Granite chat template. Updating build for xcframework to include libcommon.a to expose chat template processing
1 parent a12363b commit 13a8ecc

File tree

3 files changed

+132
-0
lines changed

3 files changed

+132
-0
lines changed

build-xcframework.sh

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,7 @@ setup_framework_structure() {
116116

117117
# Copy all required headers (common for all platforms)
118118
cp include/llama.h ${header_path}
119+
cp include/llama-cpp.h ${header_path}
119120
cp ggml/include/ggml.h ${header_path}
120121
cp ggml/include/ggml-opt.h ${header_path}
121122
cp ggml/include/ggml-alloc.h ${header_path}
@@ -124,6 +125,8 @@ setup_framework_structure() {
124125
cp ggml/include/ggml-cpu.h ${header_path}
125126
cp ggml/include/ggml-blas.h ${header_path}
126127
cp ggml/include/gguf.h ${header_path}
128+
cp common/common.h ${header_path}
129+
cp common/chat.h ${header_path}
127130

128131
# Create module map (common for all platforms)
129132
cat > ${module_path}module.modulemap << EOF
@@ -247,6 +250,7 @@ combine_static_libraries() {
247250

248251
local libs=(
249252
"${base_dir}/${build_dir}/src/${release_dir}/libllama.a"
253+
"${base_dir}/${build_dir}/common/${release_dir}/libcommon.a"
250254
"${base_dir}/${build_dir}/ggml/src/${release_dir}/libggml.a"
251255
"${base_dir}/${build_dir}/ggml/src/${release_dir}/libggml-base.a"
252256
"${base_dir}/${build_dir}/ggml/src/${release_dir}/libggml-cpu.a"

common/chat.cpp

Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1700,6 +1700,125 @@ static void common_chat_parse_hermes_2_pro(common_chat_msg_parser & builder) {
17001700
}
17011701
}
17021702

1703+
static common_chat_params common_chat_params_init_granite(const common_chat_template & tmpl, const struct templates_params & inputs) {
1704+
common_chat_params data;
1705+
1706+
// Pass thinking context for Granite template
1707+
json additional_context = {
1708+
{"thinking", inputs.enable_thinking},
1709+
};
1710+
1711+
data.prompt = apply(tmpl, inputs, /* messages_override= */ std::nullopt, /* tools_override= */ std::nullopt, additional_context);
1712+
1713+
if (string_ends_with(data.prompt, "<think>\n") || string_ends_with(data.prompt, "<think>")) {
1714+
if (!inputs.enable_thinking) {
1715+
data.prompt += "</think>";
1716+
} else {
1717+
data.thinking_forced_open = true;
1718+
}
1719+
}
1720+
1721+
if (!inputs.tools.is_null()) {
1722+
data.format = COMMON_CHAT_FORMAT_GRANITE;
1723+
// Granite uses <|tool_call|> followed by JSON list
1724+
data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
1725+
data.grammar = build_grammar([&](const common_grammar_builder & builder) {
1726+
std::vector<std::string> tool_rules;
1727+
foreach_function(inputs.tools, [&](const json & tool) {
1728+
const auto & function = tool.at("function");
1729+
std::string name = function.at("name");
1730+
auto parameters = function.at("parameters");
1731+
builder.resolve_refs(parameters);
1732+
tool_rules.push_back(builder.add_rule(name + "-call", builder.add_schema(name +
1733+
"-args", {
1734+
{"type", "object"},
1735+
{"properties", {
1736+
{"name", {{"const", name}}},
1737+
{"arguments", parameters},
1738+
}},
1739+
{"required", json::array({"name", "arguments"})},
1740+
})));
1741+
});
1742+
1743+
auto tool_call = builder.add_rule("tool_call", string_join(tool_rules, " | "));
1744+
auto tool_list = builder.add_rule("tool_list", "\"[\" space " + tool_call + " (\",\" space " + tool_call + ")* space \"]\"");
1745+
1746+
if (data.thinking_forced_open) {
1747+
builder.add_rule("root", "\"</think>\" space \"<response>\" space [^<]* \"</response>\" space \"<|tool_call|>\" space " + tool_list);
1748+
} else {
1749+
builder.add_rule("root", "\"<|tool_call|>\" space " + tool_list);
1750+
}
1751+
1752+
data.grammar_triggers.push_back({
1753+
COMMON_GRAMMAR_TRIGGER_TYPE_WORD,
1754+
"<|tool_call|>"
1755+
});
1756+
1757+
data.preserved_tokens = {
1758+
"<think>",
1759+
"</think>",
1760+
"<response>",
1761+
"</response>",
1762+
"<|tool_call|>",
1763+
};
1764+
});
1765+
} else {
1766+
data.format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
1767+
// Handle thinking tags for non-tool responses
1768+
if (data.thinking_forced_open && inputs.enable_thinking) {
1769+
data.grammar_lazy = false;
1770+
data.grammar = build_grammar([&](const common_grammar_builder & builder) {
1771+
builder.add_rule("root", "\"</think>\" space \"<response>\" space .* \"</response>\" space");
1772+
});
1773+
data.preserved_tokens = {
1774+
"<think>",
1775+
"</think>",
1776+
"<response>",
1777+
"</response>",
1778+
};
1779+
}
1780+
}
1781+
1782+
return data;
1783+
}
1784+
1785+
static void common_chat_parse_granite(common_chat_msg_parser & builder) {
1786+
// Parse thinking tags
1787+
builder.try_parse_reasoning("<think>", "</think>");
1788+
1789+
// Parse response tags using regex
1790+
static const common_regex response_regex("<response>([\\s\\S]*?)</response>");
1791+
if (auto res = builder.try_find_regex(response_regex)) {
1792+
// Extract the content between the tags (capture group 1)
1793+
auto content = builder.str(res->groups[1]);
1794+
builder.add_content(content);
1795+
builder.move_to(res->groups[0].end);
1796+
}
1797+
1798+
if (!builder.syntax().parse_tool_calls) {
1799+
builder.add_content(builder.consume_rest());
1800+
return;
1801+
}
1802+
1803+
// Look for tool calls
1804+
static const common_regex tool_call_regex(regex_escape("<|tool_call|>"));
1805+
if (auto res = builder.try_find_regex(tool_call_regex)) {
1806+
builder.move_to(res->groups[0].end);
1807+
1808+
// Expect JSON array of tool calls
1809+
auto tool_calls_data = builder.consume_json();
1810+
if (tool_calls_data.json.is_array()) {
1811+
if (!builder.add_tool_calls(tool_calls_data.json)) {
1812+
builder.add_content("<|tool_call|>" + tool_calls_data.json.dump());
1813+
}
1814+
} else {
1815+
builder.add_content("<|tool_call|>" + tool_calls_data.json.dump());
1816+
}
1817+
} else {
1818+
builder.add_content(builder.consume_rest());
1819+
}
1820+
}
1821+
17031822
static common_chat_params common_chat_params_init_without_tools(const common_chat_template & tmpl, const struct templates_params & inputs) {
17041823
common_chat_params data;
17051824
data.prompt = apply(tmpl, inputs);
@@ -1769,6 +1888,11 @@ static common_chat_params common_chat_templates_apply_jinja(
17691888
return common_chat_params_init_command_r7b(tmpl, params);
17701889
}
17711890

1891+
// Granite (IBM) - detects thinking support
1892+
if (src.find("elif thinking") != std::string::npos && src.find("<think>") != std::string::npos) {
1893+
return common_chat_params_init_granite(tmpl, params);
1894+
}
1895+
17721896
// Hermes 2/3 Pro, Qwen 2.5 Instruct (w/ tools)
17731897
if (src.find("<tool_call>") != std::string::npos && params.json_schema.is_null()) {
17741898
return common_chat_params_init_hermes_2_pro(tmpl, params);
@@ -1925,6 +2049,9 @@ static void common_chat_parse(common_chat_msg_parser & builder) {
19252049
case COMMON_CHAT_FORMAT_COMMAND_R7B:
19262050
common_chat_parse_command_r7b(builder);
19272051
break;
2052+
case COMMON_CHAT_FORMAT_GRANITE:
2053+
common_chat_parse_granite(builder);
2054+
break;
19282055
default:
19292056
throw std::runtime_error(std::string("Unsupported format: ") + common_chat_format_name(builder.syntax().format));
19302057
}

common/chat.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,7 @@ enum common_chat_format {
109109
COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1,
110110
COMMON_CHAT_FORMAT_HERMES_2_PRO,
111111
COMMON_CHAT_FORMAT_COMMAND_R7B,
112+
COMMON_CHAT_FORMAT_GRANITE,
112113

113114
COMMON_CHAT_FORMAT_COUNT, // Not a format, just the # formats
114115
};

0 commit comments

Comments
 (0)