Skip to content

Commit 14a6ab1

Browse files
committed
Merge branch 'master' into xsn/mtmd_qwen2vl
2 parents b303584 + 4e87962 commit 14a6ab1

File tree

4 files changed

+11
-28
lines changed

4 files changed

+11
-28
lines changed

examples/llava/mtmd.cpp

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -213,9 +213,6 @@ int32_t mtmd_tokenize(mtmd_context * ctx,
213213
}
214214

215215
// llava-1.5, llava-1.6, Yi-VL, Yi-34B, granite: don't need to add prefix and suffix
216-
// for glm-edge, we don't need to add because the tokens are already in the returned embeddings
217-
218-
// TODO @ngxson : glm-edge : remove BOI / EOI tokens embeddings, decode them as normal tokens
219216

220217
std::vector<std::string> parts = string_split_str(prompt_modified, ctx->image_marker);
221218
output.clear();
@@ -256,7 +253,7 @@ int32_t mtmd_tokenize(mtmd_context * ctx,
256253
};
257254

258255
for (const auto & part : parts) {
259-
//printf("tokenizing part: %s\n", part.c_str());
256+
// printf("tokenizing part: %s\n", part.c_str());
260257
bool add_bos = &parts.front() == &part;
261258
auto tokens = mtmd_tokenize_text_internal(vocab, part, text.add_special && add_bos, text.parse_special);
262259
if (tokens.empty()) {
@@ -356,11 +353,6 @@ int32_t mtmd_tokenize(mtmd_context * ctx,
356353
LOG_DBG("image_tokens->ny = %d\n", image_tokens->ny);
357354
LOG_DBG("batch_f32 size = %d\n", (int)image_tokens->batch_f32.entries.size());
358355

359-
if (clip_is_glm(ctx->ctx_clip)) {
360-
// glm-edge
361-
image_tokens->nx += 2; // add 2 for the begin_of_image and end_of_image token embeddings
362-
}
363-
364356
mtmd_input_chunk chunk{
365357
MTMD_INPUT_CHUNK_TYPE_IMAGE,
366358
{},

src/llama-chat.cpp

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -447,7 +447,7 @@ int32_t llm_chat_apply_template(
447447
if (add_ass) {
448448
ss << "<|assistant|>";
449449
}
450-
} else if (tmpl == LLM_CHAT_TEMPLATE_CHATGLM_4) {
450+
} else if (tmpl == LLM_CHAT_TEMPLATE_CHATGLM_4 || tmpl == LLM_CHAT_TEMPLATE_GLMEDGE) {
451451
ss << "[gMASK]" << "<sop>";
452452
for (auto message : chat) {
453453
std::string role(message->role);
@@ -456,14 +456,6 @@ int32_t llm_chat_apply_template(
456456
if (add_ass) {
457457
ss << "<|assistant|>";
458458
}
459-
} else if (tmpl == LLM_CHAT_TEMPLATE_GLMEDGE) {
460-
for (auto message : chat) {
461-
std::string role(message->role);
462-
ss << "<|" << role << "|>" << "\n" << message->content;
463-
}
464-
if (add_ass) {
465-
ss << "<|assistant|>";
466-
}
467459
} else if (tmpl == LLM_CHAT_TEMPLATE_MINICPM) {
468460
// MiniCPM-3B-OpenHermes-2.5-v2-GGUF
469461
for (auto message : chat) {

src/llama-context.cpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1536,8 +1536,6 @@ int32_t llama_context::output_reserve(int32_t n_outputs) {
15361536
// set all ids as invalid (negative)
15371537
std::fill(output_ids.begin(), output_ids.end(), -1);
15381538

1539-
ggml_backend_buffer_clear(buf_output.get(), 0);
1540-
15411539
this->n_outputs = 0;
15421540
this->n_outputs_max = n_outputs_max;
15431541

tests/test-chat-template.cpp

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -187,14 +187,15 @@ int main(void) {
187187
/* .bos_token= */ "",
188188
/* .eos_token= */ "",
189189
},
190-
{
191-
/* .name= */ "GLMEdge",
192-
/* .template_str= */ "{% for item in messages %}{% if item['role'] == 'system' %}<|system|>\n{{ item['content'] }}{% elif item['role'] == 'user' %}<|user|>\n{{ item['content'] }}{% elif item['role'] == 'assistant' %}<|assistant|>\n{{ item['content'] }}{% endif %}{% endfor %}<|assistant|>",
193-
/* .expected_output= */ "<|system|>\nYou are a helpful assistant<|user|>\nHello<|assistant|>\nHi there<|user|>\nWho are you<|assistant|>\n I am an assistant <|user|>\nAnother question<|assistant|>",
194-
/* .expected_output_jinja= */ "<|system|>\nYou are a helpful assistant<|user|>\nHello<|assistant|>\nHi there<|user|>\nWho are you<|assistant|>\n I am an assistant <|user|>\nAnother question<|assistant|>",
195-
/* .bos_token= */ "",
196-
/* .eos_token= */ "",
197-
},
190+
// TODO @ngxson : GLMEdge produces poor result without `[gMASK]<sop>`, so we're temporarily using GLM4 template for it. We should fix this in the future.
191+
// {
192+
// /* .name= */ "GLMEdge",
193+
// /* .template_str= */ "{% for item in messages %}{% if item['role'] == 'system' %}<|system|>\n{{ item['content'] }}{% elif item['role'] == 'user' %}<|user|>\n{{ item['content'] }}{% elif item['role'] == 'assistant' %}<|assistant|>\n{{ item['content'] }}{% endif %}{% endfor %}<|assistant|>",
194+
// /* .expected_output= */ "<|system|>\nYou are a helpful assistant<|user|>\nHello<|assistant|>\nHi there<|user|>\nWho are you<|assistant|>\n I am an assistant <|user|>\nAnother question<|assistant|>",
195+
// /* .expected_output_jinja= */ "<|system|>\nYou are a helpful assistant<|user|>\nHello<|assistant|>\nHi there<|user|>\nWho are you<|assistant|>\n I am an assistant <|user|>\nAnother question<|assistant|>",
196+
// /* .bos_token= */ "",
197+
// /* .eos_token= */ "",
198+
// },
198199
{
199200
/* .name= */ "MiniCPM-3B-OpenHermes-2.5-v2-GGUF",
200201
/* .template_str= */ U8C("{% for message in messages %}{% if message['role'] == 'user' %}{{'<用户>' + message['content'].strip() + '<AI>'}}{% else %}{{message['content'].strip()}}{% endif %}{% endfor %}"),

0 commit comments

Comments
 (0)