We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent c64d2be commit 12c2bdfCopy full SHA for 12c2bdf
examples/server/server.cpp
@@ -1772,6 +1772,9 @@ struct server_context {
1772
// force F16 KV cache for the draft model for extra performance
1773
cparams_dft.type_k = GGML_TYPE_F16;
1774
cparams_dft.type_v = GGML_TYPE_F16;
1775
+
1776
+ // the context is not needed - we will create one for each slot
1777
+ llama_init_dft.context.reset();
1778
}
1779
1780
chat_templates = common_chat_templates_from_model(model, params_base.chat_template);
0 commit comments