@@ -150,8 +150,9 @@ int32_t mtmd_helper_decode_image_chunk(
150150 int32_t n_batch,
151151 llama_pos * new_n_past) {
152152 auto chunk_type = mtmd_input_chunk_get_type (chunk);
153+ const char * name = chunk_type == MTMD_INPUT_CHUNK_TYPE_IMAGE ? " image" : " audio" ;
153154 if (chunk_type == MTMD_INPUT_CHUNK_TYPE_TEXT) {
154- LOG_ERR (" failed to decode image chunk: input chunk not of image/audio type\n " );
155+ LOG_ERR (" failed to decode chunk: input chunk not of image/audio type\n " );
155156 return -1 ;
156157 }
157158
@@ -166,8 +167,12 @@ int32_t mtmd_helper_decode_image_chunk(
166167
167168 if (mtmd_decode_use_mrope (ctx)) {
168169 const auto image_tokens = mtmd_input_chunk_get_tokens_image (chunk);
170+ if (chunk_type != MTMD_INPUT_CHUNK_TYPE_IMAGE) {
171+ LOG_ERR (" failed to decode chunk: M-RoPE only accepts image chunk\n " );
172+ return -1 ;
173+ }
169174 if (!image_tokens) {
170- LOG_ERR (" failed to decode image chunk: image tokens are null\n " );
175+ LOG_ERR (" failed to decode chunk: image tokens are null\n " );
171176 return -1 ;
172177 }
173178 const int nx = mtmd_image_tokens_get_nx (image_tokens);
@@ -187,17 +192,17 @@ int32_t mtmd_helper_decode_image_chunk(
187192 int n_tokens_batch = std::min (n_batch, n_tokens - pos_offset);
188193 llama_batch batch_embd_view = batch_embd.get_view (pos_offset, n_tokens_batch);
189194
190- LOG_INF (" decoding image batch %d/%d, n_tokens_batch = %d\n " , i_batch+1 , n_img_batches, n_tokens_batch);
195+ LOG_INF (" decoding %s batch %d/%d, n_tokens_batch = %d\n " , name , i_batch+1 , n_img_batches, n_tokens_batch);
191196
192197 int64_t t1 = ggml_time_ms ();
193198 int32_t ret = llama_decode (lctx, batch_embd_view);
194199 if (ret != 0 ) {
195- LOG_ERR (" failed to decode image \n " );
200+ LOG_ERR (" failed to decode %s \n " , name );
196201 llama_set_causal_attn (lctx, true ); // restore causal attn
197202 return ret;
198203 }
199204
200- LOG_INF (" image decoded (batch %d/%d) in %" PRId64 " ms\n " , i_batch+1 , n_img_batches, ggml_time_ms () - t1);
205+ LOG_INF (" %s decoded (batch %d/%d) in %" PRId64 " ms\n " , name , i_batch+1 , n_img_batches, ggml_time_ms () - t1);
201206
202207 i_batch++;
203208 }
@@ -259,7 +264,7 @@ int32_t mtmd_helper_eval_chunk_single(mtmd_context * ctx,
259264
260265 ret = mtmd_encode_chunk (ctx, chunk);
261266 if (ret != 0 ) {
262- LOG_ERR (" failed to encode image \n " );
267+ LOG_ERR (" failed to encode %s slice \n " , name );
263268 llama_batch_free (text_batch);
264269 return ret;
265270 }
0 commit comments