-
Notifications
You must be signed in to change notification settings - Fork 14k
Add support for Microsoft Phi-4 model #10817
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
7555ab1
520e8a0
c7fdbd3
046c0d7
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -5807,7 +5807,7 @@ static void llm_load_hparams( | |
| hparams.n_swa = 131072; | ||
| } | ||
| bool found_swa = ml.get_key(LLM_KV_ATTENTION_SLIDING_WINDOW, hparams.n_swa, false); | ||
| if (!found_swa && hparams.n_swa == 0) { | ||
| if (!found_swa && hparams.n_swa == 0 && model.name != "Phi 4") { | ||
| throw std::runtime_error("invalid value for sliding_window"); | ||
| } | ||
| } break; | ||
|
|
@@ -12839,7 +12839,13 @@ struct llm_build_context { | |
| struct ggml_tensor * inp_pos = build_inp_pos(); | ||
|
|
||
| // KQ_mask (mask for 1 head, it will be broadcasted to all heads) | ||
| struct ggml_tensor * KQ_mask_swa = build_inp_KQ_mask_swa(); | ||
| struct ggml_tensor * KQ_mask = nullptr; | ||
| if (model.name == "Phi 4") { | ||
|
||
| // Phi-4 doesn't use sliding window attention | ||
| KQ_mask = build_inp_KQ_mask(); | ||
| } else { | ||
| KQ_mask = build_inp_KQ_mask_swa(); | ||
| } | ||
|
|
||
| for (int il = 0; il < n_layer; ++il) { | ||
| auto residual = inpL; | ||
|
|
@@ -12897,7 +12903,7 @@ struct llm_build_context { | |
|
|
||
| cur = llm_build_kv(ctx0, lctx, kv_self, gf, | ||
| model.layers[il].wo, model.layers[il].bo, | ||
| Kcur, Vcur, Qcur, KQ_mask_swa, n_tokens, kv_head, n_kv, 1.0f, cb, il); | ||
| Kcur, Vcur, Qcur, KQ_mask, n_tokens, kv_head, n_kv, 1.0f, cb, il); | ||
| } | ||
|
|
||
| if (il == n_layer - 1) { | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Alternatively,
self._set_vocab_gpt2()could be called whentokenizer.modelis missing here, regardless of the model name.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I modified the solution to check value of tokenizer_class from tokenizer_config.json and call
self._set_vocab_gpt2()if it's GPT2Tokenizer.