Skip to content

Commit a5c3d4b

Browse files
committed
common/llama: align structures for reduce CPU cacheline size on 64bit platforms
- llm_graph_context from 256 to 248 bytes - llm_graph_params from 104 to 96 bytes - llama_sampler_chain from 48 to 40 bytes - llama_model_loader from 328 to 320 bytes (saved 1 cacheline) - llama_model_params from 72 to 64 bytes (saved 1 cacheline) - common_log_entry from 48 to 40 bytes - templates_params from 112 to 96 bytes (saved 16 bytes) - common_chat_params from 152 to 144 bytes - common_chat_templates_inputs from 136 to 128 bytes (saved 1 cacheline) - common_params from 4960 to 4888 bytes (saved 1 cacheline) - common_params_sampling from 288 to 280 bytes - common_grammar_trigger from 48 to 40 bytes - cpu_params from 532 to 528 bytes
1 parent 2aa777d commit a5c3d4b

File tree

13 files changed

+101
-95
lines changed

13 files changed

+101
-95
lines changed

common/chat.cpp

Lines changed: 22 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -26,13 +26,13 @@ struct common_chat_templates {
2626
struct templates_params {
2727
json messages;
2828
json tools;
29-
common_chat_tool_choice tool_choice;
3029
json json_schema;
30+
common_chat_tool_choice tool_choice;
3131
bool parallel_tool_calls;
3232
bool stream;
33-
std::string grammar;
3433
bool add_generation_prompt = true;
3534
bool extract_reasoning = true;
35+
std::string grammar;
3636
std::chrono::system_clock::time_point now = std::chrono::system_clock::now();
3737
};
3838

@@ -815,7 +815,7 @@ static common_chat_params common_chat_params_init_mistral_nemo(const common_chat
815815
}
816816
builder.add_rule("root", "\"[TOOL_CALLS]\" " + builder.add_schema("tool_calls", schema));
817817
});
818-
data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "[TOOL_CALLS]"});
818+
data.grammar_triggers.push_back({"[TOOL_CALLS]", COMMON_GRAMMAR_TRIGGER_TYPE_WORD});
819819
data.preserved_tokens = {
820820
"[TOOL_CALLS]",
821821
};
@@ -862,8 +862,8 @@ static common_chat_params common_chat_params_init_command_r7b(const common_chat_
862862
builder.add_rule("root", "\"<|START_ACTION|>\" " + builder.add_schema("tool_calls", schema) + " \"<|END_ACTION|>\"");
863863
});
864864
data.grammar_triggers.push_back({
865-
COMMON_GRAMMAR_TRIGGER_TYPE_WORD,
866865
"<|START_ACTION|>",
866+
COMMON_GRAMMAR_TRIGGER_TYPE_WORD,
867867
});
868868
data.preserved_tokens = {
869869
"<|START_ACTION|>",
@@ -1004,11 +1004,11 @@ static common_chat_params common_chat_params_init_llama_3_x(const common_chat_te
10041004
});
10051005
// Small models may hallucinate function names so we match anything (*at the start*) that looks like the JSON of a function call, regardless of the name.
10061006
data.grammar_triggers.push_back({
1007-
COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_START,
10081007
"\\{\\s*(?:\"type\"\\s*:\\s*\"function\"\\s*,\\s*)?\"name\"\\s*:\\s*\"", // + name + "\"[\\s\\S]*",
1008+
COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_START,
10091009
});
10101010
if (!builtin_tools.empty()) {
1011-
data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<|python_tag|>"});
1011+
data.grammar_triggers.push_back({"<|python_tag|>", COMMON_GRAMMAR_TRIGGER_TYPE_WORD});
10121012
data.preserved_tokens.push_back("<|python_tag|>");
10131013
}
10141014
// Allow a few empty lines on top of the usual constrained json schema space rule.
@@ -1085,10 +1085,10 @@ static common_chat_params common_chat_params_init_deepseek_r1(const common_chat_
10851085
"(" + string_join(tool_rules, " | ") + ")" + (inputs.parallel_tool_calls ? "*" : "") + " "
10861086
"\"<|tool▁calls▁end|>\""
10871087
" space");
1088-
data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<|tool▁calls▁begin|>"});
1089-
data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<|tool_calls_begin|>"});
1090-
data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<|tool calls begin|>"});
1091-
data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<|tool\\_calls\\_begin|>"});
1088+
data.grammar_triggers.push_back({"<|tool▁calls▁begin|>", COMMON_GRAMMAR_TRIGGER_TYPE_WORD});
1089+
data.grammar_triggers.push_back({"<|tool_calls_begin|>", COMMON_GRAMMAR_TRIGGER_TYPE_WORD});
1090+
data.grammar_triggers.push_back({"<|tool calls begin|>", COMMON_GRAMMAR_TRIGGER_TYPE_WORD});
1091+
data.grammar_triggers.push_back({"<|tool\\_calls\\_begin|>", COMMON_GRAMMAR_TRIGGER_TYPE_WORD});
10921092
data.preserved_tokens = {
10931093
"<think>",
10941094
"</think>",
@@ -1196,7 +1196,7 @@ static common_chat_params common_chat_params_init_firefunction_v2(const common_c
11961196
}
11971197
builder.add_rule("root", "\" functools\"? " + builder.add_schema("tool_calls", schema));
11981198
});
1199-
data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, " functools["});
1199+
data.grammar_triggers.push_back({" functools[", COMMON_GRAMMAR_TRIGGER_TYPE_WORD});
12001200
data.preserved_tokens = {
12011201
" functools[",
12021202
};
@@ -1230,20 +1230,20 @@ static common_chat_params common_chat_params_init_functionary_v3_2(const common_
12301230
first_tool_rules.push_back(builder.add_rule(name + "-call", "( \"assistant<|end_header_id|>\\n\" )? \"" + name + "\\n\" " + args_rule));
12311231
subsequent_tool_rules.push_back(builder.add_rule(name + "-call2", "\">>>" + name + "\\n\" " + args_rule));
12321232
data.grammar_triggers.push_back({
1233-
COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_START,
12341233
regex_escape(name + "\n"),
1234+
COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_START,
12351235
});
12361236
data.grammar_triggers.push_back({
1237-
COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_START,
12381237
regex_escape("assistant<|end_header_id|>\n" + name + "\n"),
1238+
COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_START,
12391239
});
12401240
data.grammar_triggers.push_back({
1241-
COMMON_GRAMMAR_TRIGGER_TYPE_WORD,
12421241
regex_escape(">>>" + name + "\n"),
1242+
COMMON_GRAMMAR_TRIGGER_TYPE_WORD,
12431243
});
12441244
data.grammar_triggers.push_back({
1245-
COMMON_GRAMMAR_TRIGGER_TYPE_WORD,
12461245
">>>assistant<|end_header_id|>\n" + name,
1246+
COMMON_GRAMMAR_TRIGGER_TYPE_WORD,
12471247
});
12481248
});
12491249
data.preserved_tokens = {
@@ -1339,12 +1339,12 @@ static common_chat_params common_chat_params_init_functionary_v3_1_llama_3_1(con
13391339
});
13401340
if (has_raw_python) {
13411341
tool_rules.push_back(builder.add_rule("python-call", "\"<|python_tag|>\" .*"));
1342-
data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<|python_tag|>"});
1342+
data.grammar_triggers.push_back({"<|python_tag|>", COMMON_GRAMMAR_TRIGGER_TYPE_WORD});
13431343
data.preserved_tokens.push_back("<|python_tag|>");
13441344
}
13451345
auto tool_call = builder.add_rule("tool_call", string_join(tool_rules, " | ")) + " space";
13461346
builder.add_rule("root", inputs.parallel_tool_calls ? "(" + tool_call + ")+" : tool_call);
1347-
data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<function="});
1347+
data.grammar_triggers.push_back({"<function=", COMMON_GRAMMAR_TRIGGER_TYPE_WORD});
13481348
});
13491349
data.format = COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1;
13501350
} else {
@@ -1404,13 +1404,13 @@ static common_chat_params common_chat_params_init_hermes_2_pro(const common_chat
14041404
"\"</function>\" space"));
14051405

14061406
data.grammar_triggers.push_back({
1407-
COMMON_GRAMMAR_TRIGGER_TYPE_WORD,
14081407
"<function=" + name + ">",
1408+
COMMON_GRAMMAR_TRIGGER_TYPE_WORD,
14091409
});
14101410
auto escaped_name = regex_escape(name);
14111411
data.grammar_triggers.push_back({
1412-
COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN,
14131412
"<function\\s+name\\s*=\\s*\"" + escaped_name + "\"",
1413+
COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN,
14141414
});
14151415
});
14161416
auto any_tool_call = builder.add_rule("any_tool_call", "( " + string_join(tool_rules, " | ") + " ) space");
@@ -1431,12 +1431,12 @@ static common_chat_params common_chat_params_init_hermes_2_pro(const common_chat
14311431
"( \"```\\n\" | \"```json\\n\" | \"```xml\\n\" ) space " + wrappable_tool_call + " space \"```\" space ");
14321432
auto tool_call = builder.add_rule("tool_call", string_join(tool_call_alts, " | "));
14331433
builder.add_rule("root", inputs.parallel_tool_calls ? "(" + tool_call + ")+" : tool_call);
1434-
data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<tool_call>"});
1435-
data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<function"});
1434+
data.grammar_triggers.push_back({"<tool_call>", COMMON_GRAMMAR_TRIGGER_TYPE_WORD});
1435+
data.grammar_triggers.push_back({"<function", COMMON_GRAMMAR_TRIGGER_TYPE_WORD});
14361436
// Trigger on some common known "good bad" outputs (only from the start and with a json that's about a specific argument name to avoid false positives)
14371437
data.grammar_triggers.push_back({
1438-
COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_START,
14391438
"(?:```(?:json|xml)?\n\\s*)?(?:<function_call>|<tools>|<xml><json>|<response>)?\\s*\\{\\s*\"", //name\"\\s*:\\s*\"" + escaped_name + "\"",
1439+
COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_START,
14401440
});
14411441
data.preserved_tokens = {
14421442
"<think>",

common/chat.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -68,18 +68,18 @@ struct common_chat_templates_inputs {
6868
bool add_generation_prompt = true;
6969
bool use_jinja = true;
7070
// Parameters below only supported when use_jinja is true
71-
std::vector<common_chat_tool> tools;
72-
common_chat_tool_choice tool_choice = COMMON_CHAT_TOOL_CHOICE_AUTO;
7371
bool parallel_tool_calls = false;
7472
bool extract_reasoning = true;
73+
common_chat_tool_choice tool_choice = COMMON_CHAT_TOOL_CHOICE_AUTO;
74+
std::vector<common_chat_tool> tools;
7575
std::chrono::system_clock::time_point now = std::chrono::system_clock::now();
7676
};
7777

7878
struct common_chat_params {
7979
common_chat_format format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
80+
bool grammar_lazy = false;
8081
std::string prompt;
8182
std::string grammar;
82-
bool grammar_lazy = false;
8383
std::vector<common_grammar_trigger> grammar_triggers;
8484
std::vector<std::string> preserved_tokens;
8585
std::vector<std::string> additional_stops;

0 commit comments

Comments
 (0)