File tree Expand file tree Collapse file tree 2 files changed +6
-2
lines changed
src/evaluator/LlamaContext Expand file tree Collapse file tree 2 files changed +6
-2
lines changed Original file line number Diff line number Diff line change @@ -403,7 +403,7 @@ AddonContext::AddonContext(const Napi::CallbackInfo& info) : Napi::ObjectWrap<Ad
403403 }
404404
405405 if (options.Has (" batchSize" )) {
406- context_params.n_batch = options.Get (" batchSize" ).As <Napi::Number>().Uint32Value () + 1 ; // +1 to handle edge cases with SWA KV cache
406+ context_params.n_batch = options.Get (" batchSize" ).As <Napi::Number>().Uint32Value ();
407407 context_params.n_ubatch = context_params.n_batch ; // the batch queue is managed in the JS side, so there's no need for managing it on the C++ side
408408 }
409409
Original file line number Diff line number Diff line change @@ -125,7 +125,11 @@ export class LlamaContext {
125125 this . _swaFullCache = ! ! swaFullCache ;
126126 this . _ctx = new this . _llama . _bindings . AddonContext ( this . _model . _model , removeNullFields ( {
127127 contextSize : this . _contextSize * this . _totalSequences , // each sequence needs its own <contextSize> of cells
128- batchSize : this . _batchSize ,
128+ batchSize : this . _batchSize + (
129+ ( ! this . _swaFullCache && this . model . fileInsights . swaSize != null && this . model . fileInsights . swaSize > 0 )
130+ ? 1 // +1 to handle edge cases with SWA KV cache
131+ : 0
132+ ) ,
129133 sequences : this . _totalSequences ,
130134 flashAttention : this . _flashAttention ,
131135 threads : this . _idealThreads ,
You can’t perform that action at this time.
0 commit comments