Skip to content

Commit 4c02664

Browse files
committed
support qwen-vl-2.5; and avoid in-place operations.
1 parent f1cf813 commit 4c02664

File tree

17 files changed

+761
-202
lines changed

17 files changed

+761
-202
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ LittleAcademia[<a href="https://github.com/foldl/little-academia" style="text-
3131

3232
**What's New:**
3333

34+
* 2025-09-23: Qwen2.5-VL
3435
* 2025-09-15: Ling/Ring-mini-2.0
3536
* 2025-09-08: GroveMoE
3637
* 2025-09-03: Apertus

convert.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4486,7 +4486,9 @@ def state_dict_pp(cls, config, state_dict):
44864486
if name == 'visual.patch_embed.proj.weight':
44874487
shape = tensor.shape
44884488
assert len(shape) == 5
4489-
r[name] = tensor.view(shape[0], shape[1] * shape[2] * shape[3] * shape[4])
4489+
assert shape[2] == 2
4490+
r[name.replace('proj.weight', 'proj.0.weight')] = tensor[:, :, 0, :, :]
4491+
r[name.replace('proj.weight', 'proj.1.weight')] = tensor[:, :, 1, :, :]
44904492
elif name.endswith('.attn.qkv.bias') or name.endswith('.attn.qkv.weight'):
44914493
#print(f'shape: {name} = {tensor.shape}')
44924494
num_heads = config.vision_config['hidden_size']
@@ -4543,7 +4545,8 @@ def get_weight_names(config):
45434545
"visual.merger.mlp.0.weight",
45444546
"visual.merger.mlp.2.bias",
45454547
"visual.merger.mlp.2.weight",
4546-
"visual.patch_embed.proj.weight",
4548+
"visual.patch_embed.proj.0.weight",
4549+
"visual.patch_embed.proj.1.weight",
45474550
]
45484551

45494552
return weight_names

docs/models.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -361,8 +361,10 @@ Please use `--format completion` for these models.
361361
* `native_resolution`: use native resolution or not, default: `false` (This seems sensitive to quantization, so defaults to `false`).
362362
* `fps`: Default 1.0.
363363

364-
* Qwen (`Qwen2AudioForConditionalGeneration`)
364+
* Qwen (`Qwen2AudioForConditionalGeneration`, `Qwen2_5_VLForConditionalGeneration`)
365365
* [x] Qwen2-Audio: [7B-Instruct](https://huggingface.co/Qwen/Qwen2-Audio-7B-Instruct/tree/0a095220c30b7b31434169c3086508ef3ea5bf0a)
366+
* [x] Qwen2.5-VL: [3B-Instruct](https://huggingface.co/Qwen/Qwen2.5-VL-3B-Instruct/tree/66285546d2b821cf421d4f5eb2576359d3770cd3), [7B-Instruct](https://huggingface.co/Qwen/Qwen2.5-VL-7B-Instruct/tree/cc594898137f460bfe9f0759e9844b3ce807cfb5)
367+
* [x] MiMo-VL: [7B-RL](https://huggingface.co/XiaomiMiMo/MiMo-VL-7B-RL/tree/460c34be0c6cfe79b6b311647ae9112784f80b73), [7B-RL-2508](https://huggingface.co/XiaomiMiMo/MiMo-VL-7B-RL-2508/tree/4bfb270765825d2fa059011deb4c96fdd579be6f)
366368

367369
* SmolVLM2 (`SmolVLMForConditionalGeneration`)
368370
* [x] [2.2B-Instruct](https://huggingface.co/HuggingFaceTB/SmolVLM2-2.2B-Instruct/tree/482adb537c021c86670beed01cd58990d01e72e4)

models/llama.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -380,11 +380,11 @@ namespace chatllm::llama::v4
380380
}
381381
};
382382

383-
class LlamaNormedSelfAttention : public QKNormedAttention<L2Norm, BaseAttention>
383+
class LlamaNormedSelfAttention : public QKNormedAttention<L2NormInplace, BaseAttention>
384384
{
385385
public:
386386
LlamaNormedSelfAttention(InitContext *ctx, int hidden_size, int num_attention_heads, int num_kv_heads, int head_dim, int max_length)
387-
: QKNormedAttention<L2Norm, BaseAttention>(ctx, hidden_size, num_attention_heads, num_kv_heads, head_dim, max_length, false, false)
387+
: QKNormedAttention<L2NormInplace, BaseAttention>(ctx, hidden_size, num_attention_heads, num_kv_heads, head_dim, max_length, false, false)
388388
{
389389
post_norm = true;
390390
}

models/moonshot.cpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -251,26 +251,28 @@ namespace chatllm::kimi::vit
251251
void before_forward(ComputeContext *ctx, const int n_past, const int qlen) override
252252
{
253253
const int len = grid_h * grid_w;
254+
std::vector<int> v_pos_w;
254255
std::vector<int> v_pos_h;
255256

256257
CHATLLM_CHECK(len <= max_length);
257258

258259
ggml::set_dim(pos, 0, len);
259260
ggml::set_dim(pos_h, 0, len);
260261

262+
v_pos_w.resize(len);
261263
v_pos_h.resize(len);
262264

263265
for (int i = 0; i < grid_h; i++)
264266
{
265267
for (int j = 0; j < grid_w; j++)
266268
{
267-
v_pos [i * grid_w + j] = j;
269+
v_pos_w[i * grid_w + j] = j;
268270
v_pos_h[i * grid_w + j] = i;
269271
}
270272
}
271273

272-
Backend::write_tensor_data(pos, v_pos.data(), 0, len * sizeof(v_pos[0]));
273-
Backend::write_tensor_data(pos_h, v_pos_h.data(), 0, len * sizeof(v_pos[0]));
274+
Backend::write_tensor_data(pos, v_pos_w.data(), 0, len * sizeof(v_pos_w[0]));
275+
Backend::write_tensor_data(pos_h, v_pos_h.data(), 0, len * sizeof(v_pos_h[0]));
274276
}
275277

276278
ggml::tensor *apply_2d_rope(ComputeContext *ctx, ggml::tensor *hidden, int hidden_size, ggml::tensor *pos_w, ggml::tensor *pos_h) const

0 commit comments

Comments
 (0)