Skip to content

Commit 8d85100

Browse files
committed
fix: bugs
1 parent 2a0c539 commit 8d85100

File tree

2 files changed

+6
-2
lines changed

2 files changed

+6
-2
lines changed

llama/addon/AddonContext.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -403,7 +403,7 @@ AddonContext::AddonContext(const Napi::CallbackInfo& info) : Napi::ObjectWrap<Ad
403403
}
404404

405405
if (options.Has("batchSize")) {
406-
context_params.n_batch = options.Get("batchSize").As<Napi::Number>().Uint32Value() + 1; // +1 to handle edge cases with SWA KV cache
406+
context_params.n_batch = options.Get("batchSize").As<Napi::Number>().Uint32Value();
407407
context_params.n_ubatch = context_params.n_batch; // the batch queue is managed in the JS side, so there's no need for managing it on the C++ side
408408
}
409409

src/evaluator/LlamaContext/LlamaContext.ts

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,11 @@ export class LlamaContext {
125125
this._swaFullCache = !!swaFullCache;
126126
this._ctx = new this._llama._bindings.AddonContext(this._model._model, removeNullFields({
127127
contextSize: this._contextSize * this._totalSequences, // each sequence needs its own <contextSize> of cells
128-
batchSize: this._batchSize,
128+
batchSize: this._batchSize + (
129+
(!this._swaFullCache && this.model.fileInsights.swaSize != null && this.model.fileInsights.swaSize > 0)
130+
? 1 // +1 to handle edge cases with SWA KV cache
131+
: 0
132+
),
129133
sequences: this._totalSequences,
130134
flashAttention: this._flashAttention,
131135
threads: this._idealThreads,

0 commit comments

Comments
 (0)