Skip to content

Commit 7a7d6f6

Browse files
author
ochafik
committed
Fix merge
1 parent e7ff6ec commit 7a7d6f6

File tree

9 files changed

+14
-23
lines changed

9 files changed

+14
-23
lines changed

common/common.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1929,8 +1929,9 @@ minja::chat_template llama_chat_template_from_model(
19291929
chat_template = _llama_model_meta_val_str(model, "tokenizer.chat_template");
19301930
}
19311931
}
1932-
auto bos_token = _common_token_to_piece(model, llama_token_bos(model), true);
1933-
auto eos_token = _common_token_to_piece(model, llama_token_eos(model), true);
1932+
const auto vocab = llama_model_get_vocab(model);
1933+
auto bos_token = common_token_to_piece(vocab, llama_vocab_bos(vocab), true);
1934+
auto eos_token = common_token_to_piece(vocab, llama_vocab_eos(vocab), true);
19341935
return {std::move(chat_template), bos_token, eos_token};
19351936
}
19361937

common/sampling.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ std::string common_sampler_prev_str(common_sampler * gsmpl, llama_context * ctx,
100100
char common_sampler_type_to_chr(enum common_sampler_type cnstr);
101101
std::string common_sampler_type_to_str(enum common_sampler_type cnstr);
102102

103-
bool common_sampler_trigger_grammar(const struct llama_model * model, common_sampler * gsmpl, const std::string & trigger);
103+
bool common_sampler_trigger_grammar(const struct llama_vocab * vocab, common_sampler * gsmpl, const std::string & trigger);
104104

105105
std::vector<enum common_sampler_type> common_sampler_types_from_names(const std::vector<std::string> & names, bool allow_alt_names);
106106
std::vector<enum common_sampler_type> common_sampler_types_from_chars(const std::string & chars);

examples/server/server.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3729,7 +3729,7 @@ int main(int argc, char ** argv) {
37293729
const auto handle_props = [&ctx_server, &res_ok, &get_chat_templates](const httplib::Request &, httplib::Response & res) {
37303730
// this endpoint is publicly available, please only return what is safe to be exposed
37313731
const auto & templates = get_chat_templates();
3732-
const auto vocab = llama_vocab_from_model(ctx_server.model);
3732+
const auto vocab = llama_model_get_vocab(ctx_server.model);
37333733
json data = {
37343734
{ "default_generation_settings", ctx_server.default_generation_settings_for_props },
37353735
{ "total_slots", ctx_server.params_base.n_parallel },
@@ -3765,7 +3765,6 @@ int main(int argc, char ** argv) {
37653765
json & data,
37663766
httplib::Response & res,
37673767
oaicompat_type oaicompat,
3768-
bool oaicompat_chat = false,
37693768
llama_tool_call_style tool_call_style = llama_tool_call_style::None) {
37703769
GGML_ASSERT(type == SERVER_TASK_TYPE_COMPLETION || type == SERVER_TASK_TYPE_INFILL);
37713770

@@ -3976,7 +3975,8 @@ int main(int argc, char ** argv) {
39763975
SERVER_TASK_TYPE_COMPLETION,
39773976
data,
39783977
res,
3979-
OAICOMPAT_TYPE_CHAT);
3978+
OAICOMPAT_TYPE_CHAT,
3979+
tool_call_style);
39803980
};
39813981

39823982
const auto handle_models = [&params, &ctx_server, &res_ok](const httplib::Request &, httplib::Response & res) {

examples/server/tests/unit/test_chat_completion.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -241,7 +241,7 @@ def test_chat_completion_with_timings_per_token():
241241
])
242242
def test_completion_with_required_tool(template_name: str, n_predict: int, tool: dict, expected_arguments: dict):
243243
global server
244-
server.use_jinja = True
244+
server.jinja = True
245245
server.n_predict = n_predict
246246
server.chat_template_file = f'../../../tests/chat/templates/{template_name}.jinja'
247247
server.start()
@@ -278,7 +278,7 @@ def test_completion_with_required_tool(template_name: str, n_predict: int, tool:
278278
])
279279
def test_completion_without_tool_call(template_name: str, n_predict: int, tools: list[dict], tool_choice: str | None):
280280
global server
281-
server.use_jinja = True
281+
server.jinja = True
282282
server.n_predict = n_predict
283283
server.chat_template_file = f'../../../tests/chat/templates/{template_name}.jinja'
284284
server.start()
@@ -322,7 +322,7 @@ def test_completion_without_tool_call(template_name: str, n_predict: int, tools:
322322
])
323323
def test_hello_world_tool_call(tool: dict, expected_arguments: dict, hf_repo: str, hf_file: str, template_override: Tuple[str, str | None] | None):
324324
global server
325-
server.use_jinja = True
325+
server.jinja = True
326326
server.n_ctx = 8192
327327
server.n_predict = 128
328328
server.model_hf_repo = hf_repo

examples/server/tests/utils.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -157,10 +157,6 @@ def start(self, timeout_seconds: int = 10) -> None:
157157
if self.lora_files:
158158
for lora_file in self.lora_files:
159159
server_args.extend(["--lora", lora_file])
160-
if self.chat_template_file:
161-
server_args.extend(["--chat-template-file", self.chat_template_file])
162-
if self.use_jinja:
163-
server_args.append("--jinja")
164160
if self.disable_ctx_shift:
165161
server_args.extend(["--no-context-shift"])
166162
if self.api_key:

examples/server/utils.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -595,7 +595,7 @@ static json oaicompat_completion_params_parse(
595595
if (has_tools) {
596596
if (stream) {
597597
throw std::runtime_error("Cannot use tools with stream");
598-
}
598+
}
599599
if (use_jinja) {
600600
if (tool_call_style == llama_tool_call_style::UnknownToolCallStyle) {
601601
throw std::runtime_error("Chat template does not seem to support tools. Override the model template with --chat-template.");

include/llama.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1193,8 +1193,6 @@ extern "C" {
11931193
const char * grammar_str,
11941194
const char * grammar_root);
11951195

1196-
LLAMA_API bool llama_sampler_is_grammar_empty(struct llama_sampler * gsmpl);
1197-
11981196
/// NOTE: Avoid using on the full vocabulary as searching for repeated tokens can become slow. For example, apply top-k or top-p sampling first.
11991197
LLAMA_API struct llama_sampler * llama_sampler_init_penalties(
12001198
int32_t penalty_last_n, // last n tokens to penalize (0 = disable penalty, -1 = context size)
@@ -1256,6 +1254,8 @@ extern "C" {
12561254
// Returns the sampled token
12571255
LLAMA_API llama_token llama_sampler_sample(struct llama_sampler * smpl, struct llama_context * ctx, int32_t idx);
12581256

1257+
LLAMA_API bool llama_sampler_is_grammar_empty(struct llama_sampler * smpl);
1258+
12591259
// TODO: extend in the future
12601260
//LLAMA_API void llama_decode_with_sampler(struct llama_context * ctx, struct llama_sampler * smpl, struct llama_batch batch, ...);
12611261

src/llama-sampling.cpp

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1511,11 +1511,6 @@ static struct llama_sampler_i llama_sampler_grammar_i = {
15111511
/* .free = */ llama_sampler_grammar_free,
15121512
};
15131513

1514-
bool llama_sampler_is_grammar_empty(struct llama_sampler * gsmpl) {
1515-
struct llama_sampler_grammar * ctx = (struct llama_sampler_grammar *) gsmpl->ctx;
1516-
return ctx->grammar == nullptr;
1517-
}
1518-
15191514
struct llama_sampler * llama_sampler_init_grammar(const struct llama_vocab * vocab, const char * grammar_str, const char * grammar_root) {
15201515
auto * ctx = new llama_sampler_grammar;
15211516

src/llama.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1130,8 +1130,7 @@ struct llm_build_context {
11301130
rope_type (hparams.rope_type),
11311131
cb (cb),
11321132
buf_compute_meta (lctx.buf_compute_meta) {
1133-
// all
1134-
ializations should be done in init()
1133+
// all initializations should be done in init()
11351134
}
11361135

11371136
void init() {

0 commit comments

Comments
 (0)