File tree Expand file tree Collapse file tree 2 files changed +10
-10
lines changed Expand file tree Collapse file tree 2 files changed +10
-10
lines changed Original file line number Diff line number Diff line change @@ -585,29 +585,28 @@ int32_t mtmd_helper_decode_image_chunk(
585585 mtmd_context * ctx,
586586 struct llama_context * lctx,
587587 const mtmd_input_chunk * chunk,
588- float * embd ,
588+ float * encoded_embd ,
589589 llama_pos n_past,
590590 llama_seq_id seq_id,
591591 int32_t n_batch,
592592 llama_pos * new_n_past) {
593-
594- if (chunk->type != MTMD_INPUT_CHUNK_TYPE_IMAGE) {
593+ if (mtmd_input_chunk_get_type (chunk) != MTMD_INPUT_CHUNK_TYPE_IMAGE) {
595594 LOG_ERR (" failed to decode image chunk: input chunk not of image type\n " );
596595 return -1 ;
597596 }
598- if (!chunk->tokens_image ) {
597+ const auto image_tokens = mtmd_input_chunk_get_tokens_image (chunk);
598+ if (!image_tokens) {
599599 LOG_ERR (" failed to decode image chunk: image tokens are null\n " );
600600 return -1 ;
601601 }
602- const auto image_tokens = chunk->tokens_image .get ();
603602
604603 int n_mmproj_embd = clip_n_mmproj_embd (ctx->ctx_clip );
605604 int n_pos_per_embd = mtmd_decode_use_mrope (ctx) ? 4 : 1 ;
606605
607606 int32_t n_tokens = mtmd_image_tokens_get_n_tokens (image_tokens);
608607 int32_t i_batch = 0 ;
609608 int32_t n_img_batches = GGML_PAD (n_tokens, n_batch) / n_batch;
610- decode_embd_batch batch_embd (embd , n_tokens, n_pos_per_embd, n_mmproj_embd);
609+ decode_embd_batch batch_embd (encoded_embd , n_tokens, n_pos_per_embd, n_mmproj_embd);
611610
612611 const int nx = mtmd_image_tokens_get_nx (image_tokens);
613612 const int ny = mtmd_image_tokens_get_ny (image_tokens);
Original file line number Diff line number Diff line change @@ -232,15 +232,16 @@ MTMD_API int32_t mtmd_helper_eval_chunk_single(mtmd_context * ctx,
232232 llama_pos * new_n_past);
233233
234234// helper function to decode an image whose embeddings have already been calculated
235+ // this helper will handle batching and pre/post decoding setup (for ex. gemma 3 requires non-causal attention)
235236// ret 0 on success, -1 on chunk not being a valid image chunk, 1 on decode failure
236- MTMD_API int32_t mtmd_helper_decode_image_chunk (mtmd_context *ctx,
237- struct llama_context *lctx,
237+ MTMD_API int32_t mtmd_helper_decode_image_chunk (mtmd_context * ctx,
238+ struct llama_context * lctx,
238239 const mtmd_input_chunk * chunk,
239- float *embd ,
240+ float * encoded_embd ,
240241 llama_pos n_past,
241242 llama_seq_id seq_id,
242243 int32_t n_batch,
243- llama_pos *new_n_past);
244+ llama_pos * new_n_past);
244245
245246// ///////////////////////////////////////
246247
You can’t perform that action at this time.
0 commit comments