Skip to content
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions examples/simple-chat/simple-chat.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -114,14 +114,15 @@ int main(int argc, char ** argv) {
// check if we have enough space in the context to evaluate this batch
int n_ctx = llama_n_ctx(ctx);
int n_ctx_used = llama_memory_seq_pos_max(llama_get_memory(ctx), 0);
if (n_ctx_used + batch.n_tokens > n_ctx) {
if (n_ctx_used + batch.n_tokens >= n_ctx) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To be more precise, I think it would be better to add 1 to the value returned by llama_memory_seq_pos_max.

int n_ctx_used = llama_memory_seq_pos_max(llama_get_memory(ctx), 0) + 1; 

printf("\033[0m\n");
fprintf(stderr, "context size exceeded\n");
exit(0);
}

if (llama_decode(ctx, batch)) {
GGML_ABORT("failed to decode\n");
int ret = llama_decode(ctx, batch);
if (ret != 0) {
GGML_ABORT("failed to decode, ret = %d\n", ret);
}

// sample the next token
Expand Down
Loading