Skip to content

Commit 8bce66d

Browse files
committed
clip: fixed warnings
1 parent 1268dc3 commit 8bce66d

File tree

1 file changed

+9
-5
lines changed

1 file changed

+9
-5
lines changed

tools/mtmd/clip.cpp

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -667,9 +667,9 @@ struct clip_graph {
667667
constexpr int _depth = 12;
668668
constexpr int enc_n_heads = 12;
669669
constexpr int enc_d_heads = enc_n_embd / enc_n_heads;
670-
constexpr int _prompt_n_embd = 256;
670+
// constexpr int _prompt_n_embd = 256;
671671
constexpr int enc_patch_size = 16;
672-
constexpr int _window_size = 14;
672+
// constexpr int _window_size = 14;
673673

674674
const int enc_n_patches = enc_image_size / enc_patch_size; // 64
675675

@@ -834,7 +834,7 @@ struct clip_graph {
834834

835835
ggml_tensor * global_features_1 = build_sam_enc(inp_raw, std::max(img.nx, img.ny));
836836

837-
ggml_tensor * global_features_2 = build_dp_ocr_clip(inp_raw, global_features_1);
837+
ggml_tensor * global_features_2 = build_dp_ocr_clip(global_features_1);
838838

839839
// torch global_features = torch.cat((global_features_2[:, 1:], global_features_1.flatten(2).permute(0, 2, 1)), dim=-1)
840840
global_features_1 = ggml_permute(ctx0, global_features_1,2,1,0,3);
@@ -1532,7 +1532,7 @@ struct clip_graph {
15321532
return gf;
15331533
}
15341534

1535-
ggml_tensor * build_dp_ocr_clip(ggml_tensor * inpL, ggml_tensor * patch_embeds) {
1535+
ggml_tensor * build_dp_ocr_clip(ggml_tensor * patch_embeds) {
15361536
GGML_ASSERT(model.class_embedding != nullptr);
15371537
GGML_ASSERT(model.position_embeddings != nullptr);
15381538

@@ -2466,6 +2466,8 @@ struct clip_graph {
24662466
return inpL;
24672467
}
24682468

2469+
// Implementation based on approach suggested by Acly
2470+
// See: https://github.com/ggml-org/llama.cpp/pull/17383#issuecomment-3554227091
24692471
static ggml_tensor* window_partition(ggml_context* ctx, ggml_tensor* x, int window) {
24702472
auto [c, w, h, b] = x->ne;
24712473
// same as
@@ -2486,6 +2488,8 @@ struct clip_graph {
24862488
return x;
24872489
}
24882490

2491+
// Implementation based on approach suggested by Acly
2492+
// See: https://github.com/ggml-org/llama.cpp/pull/17383#issuecomment-3554227091
24892493
static ggml_tensor* window_unpartition(ggml_context* m, ggml_tensor* x, int w, int h, int window) {
24902494
int64_t c = x->ne[0];
24912495
// same as
@@ -4881,7 +4885,7 @@ bool clip_image_preprocess(struct clip_ctx * ctx, const clip_image_u8 * img, str
48814885
const int min_num = 2;
48824886
const int max_num = 9;
48834887
const int image_size = params.image_size; // typically 640
4884-
const bool use_thumbnail = true; // mimic python's use_thumbnail
4888+
// const bool use_thumbnail = true; // mimic python's use_thumbnail
48854889

48864890
// original image size
48874891
const int orig_w = original_size.width;

0 commit comments

Comments
 (0)