Skip to content

Commit f2555c2

Browse files
committed
context : pad the total context to 256
1 parent 9163ef9 commit f2555c2

File tree

2 files changed

+5
-0
lines changed

2 files changed

+5
-0
lines changed

include/llama.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -463,6 +463,7 @@ extern "C" {
463463

464464
// NOTE: After creating a llama_context, it is recommended to query the actual values using these functions
465465
// In some cases the requested values via llama_context_params may differ from the actual values used by the context
466+
// ref: https://github.com/ggml-org/llama.cpp/pull/17046#discussion_r2503085732
466467
LLAMA_API uint32_t llama_n_ctx (const struct llama_context * ctx);
467468
LLAMA_API uint32_t llama_n_ctx_seq (const struct llama_context * ctx);
468469
LLAMA_API uint32_t llama_n_batch (const struct llama_context * ctx);

src/llama-context.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,10 +114,14 @@ llama_context::llama_context(
114114
}
115115
}
116116

117+
// ref: https://github.com/ggml-org/llama.cpp/pull/17046#discussion_r2503085732
118+
cparams.n_ctx = GGML_PAD(cparams.n_ctx, 256);
119+
117120
if (cparams.kv_unified) {
118121
cparams.n_ctx_seq = cparams.n_ctx;
119122
} else {
120123
cparams.n_ctx_seq = cparams.n_ctx / cparams.n_seq_max;
124+
cparams.n_ctx_seq = GGML_PAD(cparams.n_ctx_seq, 256);
121125

122126
if (cparams.n_ctx_seq == 0) {
123127
throw std::runtime_error("n_ctx_seq == 0");

0 commit comments

Comments
 (0)