You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
printf("WARNING: Draft model vocab of (%d) does not match base vocab of (%d).\nIn debug mode, this restriction is bypassed. However, speculative decoding may malfunction!\n",draftvocab,base_n_vocab);
printf("WARNING: Draft model vocab of (%d) does not match base vocab of (%d).\nIn Croco.Cpp, a tolerance of +/- 512 tokens is allowed to account for some variations between the base models and their finetunes/updates and other self-merged frankenmodels + eventual finetunes of those.\nHowever, speculative decoding may malfuction in such cases if the difference between their vocab/tokenizers is too big!\n",draftvocab,base_n_vocab);
606
+
printf("WARNING: Draft model vocab of (%d) does not match base vocab of (%d).\nIn Croco.Cpp, a tolerance of +/- 128 tokens is allowed to account for some variations between the base models and their finetunes/updates and other self-merged frankenmodels + eventual finetunes of those.\nHowever, speculative decoding may malfuction in such cases if the difference between their vocab/tokenizers is too big!\n",draftvocab,base_n_vocab);
607
607
}
608
608
else
609
609
{
610
-
printf("Error: Draft model vocab of (%d) does not match base vocab of (%d), or is above 512 tokens of difference. Speculative decoding cannot be used!\n",draftvocab,base_n_vocab);
611
-
printf("If you REALLY want to override this, run in --debugmode and this restriction will be completely disabled. However, you might encounter unwanted results!\n");
612
-
llama_free(draft_ctx);
613
-
draft_ctx = nullptr;
610
+
int diff = abs(draftvocab-base_n_vocab);
611
+
if(diff <= 256)
612
+
{
613
+
//allow small differences to work
614
+
printf("WARNING: Draft model vocab of (%d) does not match base vocab of (%d).\nIn KoboldCpp, a tolerance of +/- 256 tokens is allowed.\nSpeculative decoding may malfunction!\n",draftvocab,base_n_vocab);
615
+
} else {
616
+
printf("Error: Draft model vocab of (%d) is too different from base vocab of (%d). Speculative decoding cannot be used!\n",draftvocab,base_n_vocab);
617
+
printf("If you REALLY want to override this, run in --debugmode and this restriction will be disabled. However, you might encounter unwanted results!\n");
0 commit comments