Skip to content

Commit 73f435f

Browse files
committed
Simplified logics and UI
* removed "custom" template * fixed reading template, prefix and suffix from payload * removed `chat_template` from UI
1 parent a0e27c1 commit 73f435f

File tree

3 files changed

+24
-53
lines changed

3 files changed

+24
-53
lines changed

examples/server/public/index.html

Lines changed: 0 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -119,36 +119,6 @@ <h2 class="font-bold mb-4 ml-4">Conversations</h2>
119119
</li>
120120
</ul>
121121
</div>
122-
123-
<!-- Templates -->
124-
<div class="dropdown dropdown-end dropdown-bottom">
125-
<div tabindex="0" role="button" class="btn m-1">
126-
Templates
127-
<svg width="12px" height="12px" class="inline-block h-2 w-2 fill-current opacity-60" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 2048 2048">
128-
<path d="M1799 349l242 241-1017 1017L7 590l242-241 775 775 775-775z"></path>
129-
</svg>
130-
</div>
131-
<ul tabindex="0" class="dropdown-content bg-base-300 rounded-box z-[1] w-52 p-2 shadow-2xl h-80 overflow-y-auto">
132-
<li>
133-
<button
134-
class="btn btn-sm btn-block w-full btn-ghost justify-start"
135-
:class="{ 'btn-active': config.chat_template === 'chatml' }"
136-
@click="config.chat_template = 'chatml'">
137-
auto
138-
</button>
139-
</li>
140-
<li v-for="tmpl in templates">
141-
<input
142-
type="radio"
143-
name="tmpl-dropdown"
144-
class="theme-controller btn btn-sm btn-block w-full btn-ghost justify-start"
145-
:aria-label="tmpl"
146-
:value="tmpl"
147-
:checked="config.chat_template === tmpl"
148-
@click="setSelectedTemplate(tmpl)" />
149-
</li>
150-
</ul>
151-
</div>
152122
</div>
153123
</div>
154124

@@ -319,7 +289,6 @@ <h3 class="text-lg font-bold mb-6">Settings</h3>
319289
// Note: in order not to introduce breaking changes, please keep the same data type (number, string, etc) if you want to change the default value. Do not use null or undefined for default value.
320290
apiKey: '',
321291
systemMessage: 'You are a helpful assistant.',
322-
chat_template: 'chatml',
323292
input_prefix: '',
324293
input_suffix: '',
325294
// make sure these default values are in sync with `common.h`
@@ -347,7 +316,6 @@ <h3 class="text-lg font-bold mb-6">Settings</h3>
347316
const CONFIG_INFO = {
348317
apiKey: 'Set the API Key if you are using --api-key option for the server.',
349318
systemMessage: 'The starting message that defines how model should behave.',
350-
chat_template: 'The fromat used for messages.',
351319
input_prefix: 'Prefix for user messages in custom chat templates.',
352320
input_suffix: 'Suffix for user messages in custom chat templates.',
353321
samplers: 'The order at which samplers are applied, in simplified way. Default is "dkypmxt": dry->top_k->typ_p->top_p->min_p->xtc->temperature',
@@ -375,7 +343,6 @@ <h3 class="text-lg font-bold mb-6">Settings</h3>
375343
const CONFIG_NUMERIC_KEYS = Object.entries(CONFIG_DEFAULT).filter(e => isNumeric(e[1])).map(e => e[0]);
376344
// list of themes supported by daisyui
377345
const THEMES = ['light', 'dark', 'cupcake', 'bumblebee', 'emerald', 'corporate', 'synthwave', 'retro', 'cyberpunk', 'valentine', 'halloween', 'garden', 'forest', 'aqua', 'lofi', 'pastel', 'fantasy', 'wireframe', 'black', 'luxury', 'dracula', 'cmyk', 'autumn', 'business', 'acid', 'lemonade', 'night', 'coffee', 'winter', 'dim', 'nord', 'sunset'];
378-
const CHAT_TEMPLATES = ['chatml', 'llama2', 'mistral', 'phi3', 'zephyr', 'monarch', 'gemma', 'gemma2', 'orion', 'openchat', 'vicuna', 'vicuna-orca', 'deepseek', 'command-r', 'llama3', 'chatglm3', 'chatglm4', 'minicpm', 'deepseek2', 'exaone3', 'rwkv-world', 'granite', 'custom'];
379346

380347
// markdown support
381348
const VueMarkdown = defineComponent(
@@ -522,7 +489,6 @@ <h3 class="text-lg font-bold mb-6">Settings</h3>
522489
editingMsg: null,
523490
// const
524491
themes: THEMES,
525-
templates: CHAT_TEMPLATES,
526492
configDefault: {...CONFIG_DEFAULT},
527493
configInfo: {...CONFIG_INFO},
528494
}
@@ -542,9 +508,6 @@ <h3 class="text-lg font-bold mb-6">Settings</h3>
542508
this.selectedTheme = theme;
543509
StorageUtils.setTheme(theme);
544510
},
545-
setSelectedTemplate(template) {
546-
this.config.chat_template = template;
547-
},
548511
newConversation() {
549512
if (this.isGenerating) return;
550513
this.viewingConvId = StorageUtils.getNewConvId();
@@ -604,7 +567,6 @@ <h3 class="text-lg font-bold mb-6">Settings</h3>
604567
stream: true,
605568
cache_prompt: true,
606569
samplers: this.config.samplers,
607-
chat_template: this.config.chat_template,
608570
input_prefix: this.config.input_prefix,
609571
input_suffix: this.config.input_suffix,
610572
temperature: this.config.temperature,

examples/server/server.cpp

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -798,6 +798,7 @@ struct server_context {
798798
slot.oaicompat = false;
799799
slot.oaicompat_model = "";
800800
}
801+
std::string default_empty = "";
801802

802803
slot.params.stream = json_value(data, "stream", false);
803804
slot.params.cache_prompt = json_value(data, "cache_prompt", false);
@@ -1147,9 +1148,6 @@ struct server_context {
11471148
{"model", params.model_alias},
11481149
{"seed", slot.sparams.seed},
11491150
{"seed_cur", slot.smpl ? common_sampler_get_seed(slot.smpl) : 0},
1150-
{"chat_template", params.chat_template},
1151-
{"input_prefix", params.input_prefix},
1152-
{"input_suffix", params.input_suffix},
11531151
{"temperature", slot.sparams.temp},
11541152
{"dynatemp_range", slot.sparams.dynatemp_range},
11551153
{"dynatemp_exponent", slot.sparams.dynatemp_exponent},
@@ -3221,22 +3219,18 @@ int main(int argc, char ** argv) {
32213219

32223220
LOG_INF("%s: model loaded\n", __func__);
32233221

3224-
// if a standard chat template is not chosen, check prefix and suffix to switch to custom template
3222+
// if a standard chat template is not chosen, check prefix and suffix to switch to custom formatting
32253223
// otherwise use the one that comes with the model (if any)
32263224
// if a standard chat template is chosen, warn about prefix and suffix not being used
32273225
if (params.chat_template.empty()) {
32283226
if (!params.input_prefix.empty() || !params.input_suffix.empty()) {
3229-
LOG_WRN("%s: Prefix and suffix are used instead of a chat template. This may cause the model to output suboptimal responses\n", __func__);
3230-
params.chat_template = "custom";
3227+
LOG_WRN("%s: Prefix and suffix will be used for a custom chat template. This may cause the model to output suboptimal responses\n", __func__);
32313228
} else if (!ctx_server.validate_model_chat_template()) {
32323229
LOG_WRN("%s: The chat template that comes with this model is not yet supported, falling back to chatml. This may cause the model to output suboptimal responses\n", __func__);
32333230
params.chat_template = "chatml";
32343231
}
3235-
} else if (params.chat_template != "custom" &&
3236-
(!params.input_prefix.empty() || !params.input_suffix.empty())) {
3237-
LOG_WRN("%s: Prefix and suffix are defined, but will not be used because a standard chat template is chosen.\n", __func__);
3238-
} else {
3239-
LOG_WRN("%s: Custom chat template is chosen. This may cause the model to output suboptimal responses\n", __func__);
3232+
} else if (!params.input_prefix.empty() || !params.input_suffix.empty()) {
3233+
LOG_WRN("%s: Prefix and suffix are defined, but will not be used because a chat template '%s' is chosen.\n", __func__, params.chat_template.c_str());
32403234
}
32413235

32423236
// print sample chat example to make it clear which template is used

examples/server/utils.hpp

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -304,6 +304,8 @@ inline std::string format_chat(const struct llama_model * model, const std::stri
304304
std::vector<common_chat_msg> chat;
305305
std::string formatted_chat;
306306

307+
bool is_custom = !prefix.empty() || !suffix.empty();
308+
307309
for (size_t i = 0; i < messages.size(); ++i) {
308310
const auto & curr_msg = messages[i];
309311

@@ -326,7 +328,7 @@ inline std::string format_chat(const struct llama_model * model, const std::stri
326328
throw std::runtime_error("Missing 'content' (ref: https://github.com/ggerganov/llama.cpp/issues/8367)");
327329
}
328330

329-
if (tmpl == "custom") {
331+
if (is_custom) {
330332
// simple format using prefix and suffix
331333
if (role == "user") formatted_chat += prefix + content + suffix;
332334
else formatted_chat += content;
@@ -335,7 +337,7 @@ inline std::string format_chat(const struct llama_model * model, const std::stri
335337
}
336338
}
337339

338-
if (tmpl != "custom") formatted_chat = common_chat_apply_template(model, tmpl, chat, true);
340+
if (!is_custom) formatted_chat = common_chat_apply_template(model, tmpl, chat, true);
339341
LOG_WRN("formatted_chat using '%s': '%s'\n", tmpl.c_str(), formatted_chat.c_str());
340342

341343
return formatted_chat;
@@ -351,7 +353,7 @@ inline std::string format_chat_example(const struct llama_model * model, const s
351353

352354
std::string formatted_example;
353355

354-
if (tmpl == "custom") {
356+
if (!prefix.empty() || !suffix.empty()) {
355357
for (auto message : msgs) {
356358
if (message.role == "user") formatted_example += prefix + message.content + suffix;
357359
else formatted_example += message.content;
@@ -634,7 +636,20 @@ static json oaicompat_completion_params_parse(
634636
llama_params["__oaicompat"] = true;
635637

636638
// Apply chat template to the list of messages
637-
llama_params["prompt"] = format_chat(model, chat_template, input_prefix, input_suffix, body.at("messages"));
639+
std::string chat_tmpl = chat_template;
640+
std::string prefix = "";
641+
std::string suffix = "";
642+
643+
// if template is sent in data, ignore prefix and suffix
644+
if (body.contains("chat_template")) {
645+
chat_tmpl = body.at("chat_template").get<std::string>();
646+
LOG_WRN("\nUsing '%s' template, prefix and suffix are ignored.\n", chat_tmpl.c_str());
647+
} else {
648+
prefix = (body.contains("input_prefix") ? body.at("input_prefix").get<std::string>() : input_prefix);
649+
suffix = (body.contains("input_suffix") ? body.at("input_suffix").get<std::string>() : input_suffix);
650+
}
651+
652+
llama_params["prompt"] = format_chat(model, chat_tmpl, prefix, suffix, body.at("messages"));
638653

639654
// Handle "stop" field
640655
if (body.contains("stop") && body.at("stop").is_string()) {

0 commit comments

Comments
 (0)