@@ -4471,7 +4471,6 @@ using llama_buf_map = std::unordered_map<uint32_t, ggml_backend_buffer_t>;
44714471
44724472static size_t llama_model_max_nodes(const llama_model & model) {
44734473 return std::max<size_t>(8192, model.tensors_by_name.size()*5);
4474-
44754474}
44764475
44774476struct llama_model_loader {
@@ -10231,8 +10230,7 @@ struct llm_build_context {
1023110230 llama_context & lctx,
1023210231 const llama_ubatch & batch,
1023310232 const llm_build_cb & cb,
10234- bool worst_case,
10235- bool warmup) :
10233+ bool worst_case) :
1023610234 model (lctx.model),
1023710235 lctx (lctx),
1023810236 hparams (model.hparams),
@@ -10250,7 +10248,7 @@ struct llm_build_context {
1025010248 n_embd_head_v (hparams.n_embd_head_v),
1025110249 n_embd_v_gqa (hparams.n_embd_v_gqa()),
1025210250 n_expert (hparams.n_expert),
10253- n_expert_used (warmup ? hparams.n_expert : hparams.n_expert_used),
10251+ n_expert_used (hparams.n_expert_used),
1025410252 freq_base (cparams.rope_freq_base),
1025510253 freq_scale (cparams.rope_freq_scale),
1025610254 ext_factor (cparams.yarn_ext_factor),
@@ -16054,7 +16052,7 @@ static struct ggml_cgraph * llama_build_graph_defrag(llama_context & lctx, const
1605416052
1605516053 llm_build_cb cb = [&](struct ggml_tensor * , const char * , int ) { };
1605616054
16057- struct llm_build_context llm(lctx, dummy, cb, false, false );
16055+ struct llm_build_context llm(lctx, dummy, cb, false);
1605816056
1605916057 llm.init();
1606016058
@@ -16071,7 +16069,7 @@ static struct ggml_cgraph * llama_build_graph_k_shift(llama_context & lctx) {
1607116069
1607216070 llm_build_cb cb = [&](struct ggml_tensor * , const char * , int ) { };
1607316071
16074- struct llm_build_context llm(lctx, dummy, cb, false, false );
16072+ struct llm_build_context llm(lctx, dummy, cb, false);
1607516073
1607616074 llm.init();
1607716075
@@ -16088,7 +16086,7 @@ static struct ggml_cgraph * llama_build_graph_k_shift(llama_context & lctx) {
1608816086
1608916087 // llm_build_cb cb = [&](struct ggml_tensor * , const char * , int ) { };
1609016088
16091- // struct llm_build_context llm(lctx, dummy, cb, false, false );
16089+ // struct llm_build_context llm(lctx, dummy, cb, false);
1609216090
1609316091 // llm.init();
1609416092
@@ -16142,11 +16140,7 @@ static struct ggml_cgraph * llama_build_graph(
1614216140
1614316141 struct ggml_cgraph * result = NULL;
1614416142
16145- const llama_vocab * vocab = llama_get_vocab(&lctx);
16146- llama_token bos = llama_token_bos_impl(*vocab);
16147- llama_token eos = llama_token_eos_impl(*vocab);
16148- bool is_warming_up = (batch.n_tokens == 1 && batch.token[0] == bos);
16149- struct llm_build_context llm(lctx, batch, cb, worst_case, is_warming_up);
16143+ struct llm_build_context llm(lctx, batch, cb, worst_case);
1615016144
1615116145 llm.init();
1615216146
0 commit comments