Skip to content

Commit e288693

Browse files
authored
readme : model : mtdm : lfm2 improvements (ggml-org#15476)
* Support untied embeddings * Increase number of image tokens to 1024 * Add LFM2-VL to readme * Actually use untied embeddings
1 parent a0f98dd commit e288693

File tree

5 files changed

+11
-4
lines changed

5 files changed

+11
-4
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,7 @@ Instructions for adding support for new models: [HOWTO-add-model.md](docs/develo
151151
- [x] [Bunny](https://github.com/BAAI-DCAI/Bunny)
152152
- [x] [GLM-EDGE](https://huggingface.co/models?search=glm-edge)
153153
- [x] [Qwen2-VL](https://huggingface.co/collections/Qwen/qwen2-vl-66cee7455501d7126940800d)
154+
- [x] [LFM2-VL](https://huggingface.co/collections/LiquidAI/lfm2-vl-68963bbc84a610f7638d5ffa)
154155

155156
</details>
156157

gguf-py/gguf/constants.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2590,6 +2590,7 @@ class MODEL_TENSOR(IntEnum):
25902590
MODEL_TENSOR.ATTN_K,
25912591
MODEL_TENSOR.ATTN_V,
25922592
MODEL_TENSOR.ATTN_OUT,
2593+
MODEL_TENSOR.OUTPUT,
25932594
],
25942595
MODEL_ARCH.SMALLTHINKER: [
25952596
MODEL_TENSOR.TOKEN_EMBD,

src/llama-arch.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2010,6 +2010,7 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
20102010
{ LLM_TENSOR_SHORTCONV_OUTPROJ, "blk.%d.shortconv.out_proj" },
20112011
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
20122012
{ LLM_TENSOR_TOKEN_EMBD_NORM, "token_embd_norm" },
2013+
{ LLM_TENSOR_OUTPUT, "output" },
20132014
}
20142015
},
20152016
{

src/llama-model.cpp

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5474,8 +5474,13 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
54745474
} break;
54755475
case LLM_ARCH_LFM2:
54765476
{
5477-
tok_embd = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, 0);
5477+
tok_embd = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, 0);
54785478
tok_norm = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD_NORM, "weight"), {n_embd}, 0);
5479+
output = create_tensor(tn(LLM_TENSOR_OUTPUT, "weight"), {n_embd, n_vocab}, TENSOR_NOT_REQUIRED);
5480+
5481+
if (output == NULL) {
5482+
output = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, TENSOR_DUPLICATED);
5483+
}
54795484

54805485
for (int i = 0; i < n_layer; ++i) {
54815486
auto & layer = layers[i];
@@ -17787,8 +17792,7 @@ struct llm_build_lfm2 : public llm_graph_context {
1778717792
cb(cur, "model.embedding_norm", -1);
1778817793
res->t_embd = cur;
1778917794

17790-
// lm_head is tied with embeddings
17791-
cur = build_lora_mm(model.tok_embd, cur);
17795+
cur = build_lora_mm(model.output, cur);
1779217796
cb(cur, "lm_head", -1);
1779317797

1779417798
res->t_logits = cur;

tools/mtmd/clip.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3513,7 +3513,7 @@ bool clip_image_preprocess(struct clip_ctx * ctx, const clip_image_u8 * img, str
35133513
const int height = img->ny;
35143514
const int total_factor = params.patch_size * params.proj_scale_factor;
35153515
constexpr int min_image_tokens = 64;
3516-
constexpr int max_image_tokens = 256;
3516+
constexpr int max_image_tokens = 1024;
35173517
const float min_pixels = min_image_tokens * total_factor * total_factor;
35183518
const float max_pixels = max_image_tokens * total_factor * total_factor;
35193519

0 commit comments

Comments
 (0)