@@ -20,7 +20,7 @@ llama_context::llama_context(
2020 const llama_model & model,
2121 llama_context_params params) :
2222 model(model),
23- batch_allocr (std::make_unique<llama_batch_allocr>()) {
23+ balloc (std::make_unique<llama_batch_allocr>()) {
2424 LLAMA_LOG_INFO (" %s: constructing llama_context\n " , __func__);
2525
2626 t_start_us = model.t_start_us ;
@@ -734,14 +734,14 @@ int llama_context::encode(const llama_batch & batch_inp) {
734734 const int64_t n_embd = hparams.n_embd ;
735735
736736 // note: during encode, we always pass the full sequence starting from pos = 0
737- if (!batch_allocr ->init (batch_inp, model.vocab , nullptr , n_embd, true )) {
737+ if (!balloc ->init (batch_inp, model.vocab , nullptr , n_embd, true )) {
738738 LLAMA_LOG_ERROR (" %s: failed to initialize batch\n " , __func__);
739739 return -1 ;
740740 }
741741
742- const uint32_t n_tokens = batch_allocr ->get_n_tokens ();
742+ const uint32_t n_tokens = balloc ->get_n_tokens ();
743743
744- const llama_ubatch ubatch = batch_allocr ->split_simple (n_tokens);
744+ const llama_ubatch ubatch = balloc ->split_simple (n_tokens);
745745
746746 // micro-batching is not possible for non-causal encoding, so we process the batch in a single shot
747747 GGML_ASSERT (cparams.n_ubatch >= n_tokens && " encoder requires n_ubatch >= n_tokens" );
@@ -859,7 +859,7 @@ int llama_context::encode(const llama_batch & batch_inp) {
859859 cross.v_embd .resize (cross.n_embd *cross.n_enc );
860860 memcpy (cross.v_embd .data (), embd, ggml_nbytes (t_embd));
861861
862- const auto & batch = batch_allocr ->get_batch ();
862+ const auto & batch = balloc ->get_batch ();
863863
864864 // remember the sequence ids used during the encoding - needed for cross attention later
865865 cross.seq_ids_enc .resize (n_tokens);
@@ -897,13 +897,13 @@ int llama_context::decode(const llama_batch & batch_inp) {
897897 // when computing embeddings, all tokens are output
898898 const bool output_all = cparams.embeddings ;
899899
900- if (!batch_allocr ->init (batch_inp, vocab, memory.get (), n_embd, output_all)) {
900+ if (!balloc ->init (batch_inp, vocab, memory.get (), n_embd, output_all)) {
901901 LLAMA_LOG_ERROR (" %s: failed to initialize batch\n " , __func__);
902902 return -1 ;
903903 }
904904
905- const uint32_t n_tokens_all = batch_allocr ->get_n_tokens ();
906- const uint32_t n_outputs_all = batch_allocr ->get_n_outputs ();
905+ const uint32_t n_tokens_all = balloc ->get_n_tokens ();
906+ const uint32_t n_outputs_all = balloc ->get_n_outputs ();
907907
908908 if (output_all) {
909909 // require that all tokens are output
@@ -934,7 +934,7 @@ int llama_context::decode(const llama_batch & batch_inp) {
934934 llama_memory_state_ptr mstate;
935935
936936 while (true ) {
937- mstate = memory->init_batch (batch_allocr. get () , cparams.n_ubatch , output_all);
937+ mstate = memory->init_batch (*balloc , cparams.n_ubatch , output_all);
938938 if (!mstate) {
939939 return -2 ;
940940 }
@@ -955,19 +955,19 @@ int llama_context::decode(const llama_batch & batch_inp) {
955955 did_optimize = true ;
956956
957957 if (kv_self_update (true )) {
958- LLAMA_LOG_DEBUG (" %s: retrying batch size %d after cache optimization\n " , __func__, batch_allocr ->get_n_tokens ());
958+ LLAMA_LOG_DEBUG (" %s: retrying batch size %d after cache optimization\n " , __func__, balloc ->get_n_tokens ());
959959
960960 continue ;
961961 }
962962 }
963963
964- LLAMA_LOG_WARN (" %s: failed to find a memory slot for batch of size %d\n " , __func__, batch_allocr ->get_n_tokens ());
964+ LLAMA_LOG_WARN (" %s: failed to find a memory slot for batch of size %d\n " , __func__, balloc ->get_n_tokens ());
965965
966966 return 1 ;
967967 }
968968 case LLAMA_MEMORY_STATUS_FAILED_COMPUTE:
969969 {
970- LLAMA_LOG_ERROR (" %s: compute failed while preparing batch of size %d\n " , __func__, batch_allocr ->get_n_tokens ());
970+ LLAMA_LOG_ERROR (" %s: compute failed while preparing batch of size %d\n " , __func__, balloc ->get_n_tokens ());
971971
972972 return -2 ;
973973 }
@@ -1133,7 +1133,7 @@ int llama_context::decode(const llama_batch & batch_inp) {
11331133 if (n_outputs > 0 ) {
11341134 bool sorted_output = true ;
11351135
1136- auto & out_ids = batch_allocr ->get_out_ids ();
1136+ auto & out_ids = balloc ->get_out_ids ();
11371137
11381138 GGML_ASSERT (out_ids.size () == (size_t ) n_outputs);
11391139
@@ -1306,8 +1306,8 @@ ggml_cgraph * llama_context::graph_reserve(uint32_t n_tokens, uint32_t n_seqs, u
13061306
13071307 this ->n_outputs = n_outputs;
13081308
1309- llama_batch_allocr batch_allocr ;
1310- llama_ubatch ubatch = batch_allocr .ubatch_reserve (n_tokens/n_seqs, n_seqs);
1309+ llama_batch_allocr balloc ;
1310+ llama_ubatch ubatch = balloc .ubatch_reserve (n_tokens/n_seqs, n_seqs);
13111311
13121312 auto * gf = graph_init ();
13131313 auto res = graph_build (ctx_compute.get (), gf, ubatch, LLM_GRAPH_TYPE_DEFAULT, mstate);
@@ -2027,12 +2027,12 @@ void llama_context::opt_epoch_iter(
20272027 batch.logits [pos_batch] = true ;
20282028 }
20292029
2030- if (!batch_allocr ->init (batch, model.vocab , nullptr , model.hparams .n_embd , true )) {
2030+ if (!balloc ->init (batch, model.vocab , nullptr , model.hparams .n_embd , true )) {
20312031 LLAMA_LOG_ERROR (" %s: failed to initialize batch\n " , __func__);
20322032 return ;
20332033 }
20342034
2035- const uint32_t n_tokens_all = batch_allocr ->get_n_tokens ();
2035+ const uint32_t n_tokens_all = balloc ->get_n_tokens ();
20362036
20372037 n_queued_tokens += n_tokens_all;
20382038
@@ -2041,7 +2041,7 @@ void llama_context::opt_epoch_iter(
20412041 uint32_t n_outputs_all = n_tokens_all;
20422042
20432043 // TODO: fix
2044- auto mstate = memory->init_batch (batch_allocr. get () , cparams.n_ubatch , true );
2044+ auto mstate = memory->init_batch (*balloc , cparams.n_ubatch , true );
20452045 if (!mstate || mstate->get_status () != LLAMA_MEMORY_STATUS_SUCCESS) {
20462046 LLAMA_LOG_ERROR (" %s: could not initialize batch\n " , __func__);
20472047 break ;
0 commit comments