Skip to content

Commit 7e1bb04

Browse files
committed
remove attn_window_size from gguf
1 parent 77b144a commit 7e1bb04

File tree

2 files changed

+6
-6
lines changed

2 files changed

+6
-6
lines changed

examples/llava/clip.cpp

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1720,8 +1720,7 @@ struct clip_model_loader {
17201720
get_u32(KEY_IMAGE_SIZE, hparams.image_size);
17211721
get_u32(KEY_PATCH_SIZE, hparams.patch_size);
17221722
get_u32(KEY_IMAGE_CROP_RESOLUTION, hparams.image_crop_resolution, false);
1723-
get_u32(KEY_ATTN_WINDOW_SIZE, hparams.attn_window_size, false);
1724-
get_u32(KEY_WIN_ATTN_PATTERN, hparams.n_wa_pattern, false);
1723+
get_u32(KEY_WIN_ATTN_PATTERN, hparams.n_wa_pattern, ctx_clip.proj_type == PROJECTOR_TYPE_QWEN2_5_VL);
17251724
get_arr_int(KEY_IMAGE_GRID_PINPOINTS, hparams.image_grid_pinpoints, false);
17261725

17271726
{
@@ -3210,12 +3209,13 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima
32103209
std::vector<int> idx(ph * pw);
32113210
std::vector<int> inv_idx(ph * pw);
32123211

3213-
if (hparams.attn_window_size > 0) {
3212+
if (use_window_attn) {
3213+
const int attn_window_size = 112;
32143214
struct ggml_tensor * window_idx = ggml_graph_get_tensor(gf, "window_idx");
32153215
struct ggml_tensor * inv_window_idx = ggml_graph_get_tensor(gf, "inv_window_idx");
32163216
struct ggml_tensor * window_mask = ggml_graph_get_tensor(gf, "window_mask");
32173217

3218-
const int grid_window = hparams.attn_window_size / patch_size / merge_ratio;
3218+
const int grid_window = attn_window_size / patch_size / merge_ratio;
32193219
int dst = 0;
32203220
// [num_vision_tokens, num_vision_tokens] attention mask tensor
32213221
std::vector<float> mask(pow(ipw * iph, 2), std::numeric_limits<float>::lowest());
@@ -3342,9 +3342,10 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima
33423342
struct ggml_tensor * window_mask = ggml_graph_get_tensor(gf, "window_mask");
33433343

33443344
const int merge_ratio = 2;
3345+
const int attn_window_size = 112;
33453346
const int pw = image_size_width / patch_size / merge_ratio;
33463347
const int ph = image_size_height / patch_size / merge_ratio;
3347-
const int grid_window = hparams.attn_window_size / patch_size / merge_ratio;
3348+
const int grid_window = attn_window_size / patch_size / merge_ratio;
33483349
const int ipw = image_size_width / patch_size;
33493350
const int iph = image_size_height / patch_size;
33503351
/*

examples/llava/qwen2_vl_surgery.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -167,7 +167,6 @@ def main(args):
167167

168168
if args.model_type == "qwen2.5vl":
169169
fout.add_uint32("clip.vision.n_wa_pattern", get_n_wa_pattern(vcfg.fullatt_block_indexes))
170-
fout.add_uint32("clip.vision.window_size", vcfg.window_size)
171170
fout.add_uint32(k(KEY_EMBEDDING_LENGTH, VISION), vcfg.hidden_size)
172171
fout.add_uint32("clip.vision.projection_dim", vcfg.out_hidden_size)
173172
fout.add_string("clip.projector_type", "qwen2.5vl_merger")

0 commit comments

Comments
 (0)