@@ -740,7 +740,7 @@ int llama_context::encode(const llama_batch & batch_inp) {
740740 const int64_t n_embd = hparams.n_embd ;
741741
742742 // note: during encode, we always pass the full sequence starting from pos = 0
743- if (!balloc->init (batch_inp, model.vocab , nullptr , n_embd, true )) {
743+ if (!balloc->init (batch_inp, model.vocab , nullptr , n_embd, cparams. n_seq_max , true )) {
744744 LLAMA_LOG_ERROR (" %s: failed to initialize batch\n " , __func__);
745745 return -1 ;
746746 }
@@ -907,7 +907,7 @@ int llama_context::decode(const llama_batch & batch_inp) {
907907 // when computing embeddings, all tokens are output
908908 const bool output_all = cparams.embeddings ;
909909
910- if (!balloc->init (batch_inp, vocab, memory.get (), n_embd, output_all)) {
910+ if (!balloc->init (batch_inp, vocab, memory.get (), n_embd, cparams. n_seq_max , output_all)) {
911911 LLAMA_LOG_ERROR (" %s: failed to initialize batch\n " , __func__);
912912 return -1 ;
913913 }
@@ -2036,7 +2036,7 @@ void llama_context::opt_epoch_iter(
20362036 batch.logits [pos_batch] = true ;
20372037 }
20382038
2039- if (!balloc->init (batch, model.vocab , nullptr , model.hparams .n_embd , true )) {
2039+ if (!balloc->init (batch, model.vocab , nullptr , model.hparams .n_embd , cparams. n_seq_max , true )) {
20402040 LLAMA_LOG_ERROR (" %s: failed to initialize batch\n " , __func__);
20412041 return ;
20422042 }
0 commit comments