File tree Expand file tree Collapse file tree 2 files changed +8
-6
lines changed Expand file tree Collapse file tree 2 files changed +8
-6
lines changed Original file line number Diff line number Diff line change @@ -3941,7 +3941,7 @@ int main(int argc, char ** argv) {
39413941 const auto handle_completions_impl = [&ctx_server, &res_error, &res_ok](
39423942 server_task_type type,
39433943 json & data,
3944- std::function<bool ()> is_connection_closed,
3944+ const std::function<bool ()> & is_connection_closed,
39453945 httplib::Response & res,
39463946 oaicompat_type oaicompat) {
39473947 GGML_ASSERT (type == SERVER_TASK_TYPE_COMPLETION || type == SERVER_TASK_TYPE_INFILL);
Original file line number Diff line number Diff line change @@ -699,6 +699,8 @@ int llama_context::encode(llama_batch & inp_batch) {
699699 t_compute_start_us = ggml_time_us ();
700700 }
701701
702+ embd_seq.clear ();
703+
702704 n_queued_tokens += n_tokens;
703705
704706 const int64_t n_embd = hparams.n_embd ;
@@ -839,13 +841,13 @@ int llama_context::encode(llama_batch & inp_batch) {
839841}
840842
841843int llama_context::decode (llama_batch & inp_batch) {
842- if (inp_batch.n_tokens == 0 ) {
843- LLAMA_LOG_ERROR (" %s: n_tokens == 0\n " , __func__);
844- return -1 ;
845- }
846-
847844 if (!memory) {
848845 LLAMA_LOG_WARN (" %s: cannot decode batches with this context\n " , __func__);
846+ return encode (inp_batch);
847+ }
848+
849+ if (inp_batch.n_tokens == 0 ) {
850+ LLAMA_LOG_ERROR (" %s: n_tokens == 0\n " , __func__);
849851 return -1 ;
850852 }
851853
You can’t perform that action at this time.
0 commit comments