You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
fprintf(stderr, "%s: mmproj embedding mismatch between Audio and Vision (%d and %d)! Make sure you use the correct mmproj file!\n", __func__,n_embd_clip, n_embd_a);
2444
+
int n_embd_clip_a = -1;
2445
+
int n_embd_clip_v = -1;
2446
+
if (clp_ctx_v)
2447
+
{
2448
+
n_embd_clip_v = clip_n_mmproj_embd(clp_ctx_v);
2449
+
if (n_embd_clip_v != n_embd_llm) {
2450
+
fprintf(stderr, "%s: mmproj vision embedding mismatch (%d and %d)! Make sure you use the correct mmproj file!\n", __func__,n_embd_clip_v, n_embd_llm);
2451
+
return ModelLoadResult::FAIL;
2452
+
}
2453
+
}
2454
+
if (clp_ctx_a)
2455
+
{
2456
+
n_embd_clip_a = clip_n_mmproj_embd(clp_ctx_a);
2457
+
if (n_embd_clip_a != n_embd_llm) {
2458
+
fprintf(stderr, "%s: mmproj audio embedding mismatch (%d and %d)! Make sure you use the correct mmproj file!\n", __func__,n_embd_clip_a, n_embd_llm);
2449
2459
return ModelLoadResult::FAIL;
2450
2460
}
2451
2461
}
2452
-
if (n_embd_clip != n_embd_llm) {
2453
-
fprintf(stderr, "%s: mmproj embedding mismatch (%d and %d)! Make sure you use the correct mmproj file!\n", __func__,n_embd_clip, n_embd_llm);
2462
+
if (clp_ctx_v && clp_ctx_a && n_embd_clip_v != n_embd_clip_a) {
2463
+
fprintf(stderr, "%s: mmproj embedding mismatch between Audio and Vision (%d and %d)! Make sure you use the correct mmproj file!\n", __func__,n_embd_clip_v, n_embd_clip_a);
2454
2464
return ModelLoadResult::FAIL;
2455
2465
}
2466
+
2456
2467
if(clp_ctx_a) //init audio
2457
2468
{
2458
2469
if (clip_has_whisper_encoder(clp_ctx_a)) {
@@ -2473,9 +2484,9 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
2473
2484
{
2474
2485
printf("Error: Speculative decoding cannot be used with Recurrent models!\n");
2475
2486
}
2476
-
elseif(clp_ctx_v!=nullptr)
2487
+
elseif(clp_ctx_v!=nullptr || clp_ctx_a!=nullptr)
2477
2488
{
2478
-
printf("Error: Speculative decoding cannot be used with multimodal vision projectors!\n");
2489
+
printf("Error: Speculative decoding cannot be used with multimodal projectors!\n");
printf("\nWarning: Audio Embd excluded - Context size too low or not enough clip tokens! (needed %d)\nAudio will be IGNORED! You probably want to relaunch with a larger context size!\n",cliptokensneeded);
3116
3127
}
3117
3128
3129
+
}else{
3130
+
printf("\nUnhandled media object, something went wrong.\n");
0 commit comments