Skip to content

Commit 06145bf

Browse files
committed
small clean up
1 parent fa4fed7 commit 06145bf

File tree

2 files changed

+2
-20
lines changed

2 files changed

+2
-20
lines changed

tools/mtmd/clip.cpp

Lines changed: 1 addition & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1667,27 +1667,9 @@ struct clip_graph {
16671667
inpL = cur;
16681668
}
16691669

1670-
// TODO @ngxson : find a way to move this output of this function
1670+
// TODO @ngxson : find a way to move this outside
16711671
if (ctx->proj_type == PROJECTOR_TYPE_QWEN2A) {
16721672
ggml_tensor * cur = inpL;
1673-
// trick: use sum_rows and ggml_scale instead of ggml_pool_1d
1674-
// because ggml_pool_1d is not supported on some GPU backend
1675-
// add padding if number of frames is not divisible by 2
1676-
/*
1677-
if (cur->ne[1] % 2 != 0) {
1678-
cur = ggml_pad(ctx0, cur, 0, 1, 0, 0);
1679-
}
1680-
cur = ggml_reshape_3d(ctx0, cur, cur->ne[0], 2, cur->ne[1]/2); // [n_embd, 2, n_frames/2]
1681-
cur = ggml_transpose(ctx0, cur); // [2, n_embd, n_frames/2]
1682-
// calc mean value
1683-
{
1684-
cur = ggml_cast(ctx0, cur, GGML_TYPE_F32);
1685-
cur = ggml_sum_rows(ctx0, cur); // [1, n_embd, n_frames/2]
1686-
cur = ggml_scale(ctx0, cur, 0.5f);
1687-
}
1688-
cur = ggml_transpose(ctx0, cur); // [n_embd, 1, n_frames/2]
1689-
cur = ggml_reshape_2d(ctx0, cur, cur->ne[0], cur->ne[2]); // [n_embd, n_frames/2]
1690-
*/
16911673
cur = ggml_transpose(ctx0, cur);
16921674
cur = ggml_cast(ctx0, cur, GGML_TYPE_F32);
16931675
cur = ggml_pool_1d(ctx0, cur, GGML_OP_POOL_AVG, 2, 2, 0);

tools/mtmd/mtmd.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -335,7 +335,7 @@ int32_t mtmd_tokenize(mtmd_context * ctx,
335335
string_replace_all(prompt_modified, ctx->media_marker, marker_modified);
336336

337337
} else if (proj_type == PROJECTOR_TYPE_QWEN2A) {
338-
// <|audio_bos|> ... (image embeddings) ... <|audio_eos|>
338+
// <|audio_bos|> ... (embeddings) ... <|audio_eos|>
339339
marker_modified = "<|audio_bos|>" + ctx->media_marker + "<|audio_eos|>";
340340
string_replace_all(prompt_modified, ctx->media_marker, marker_modified);
341341

0 commit comments

Comments
 (0)