fix: bugs

giladgd · giladgd · commit 8d85100bf190 · 2025-06-04T03:51:58.000+03:00
diff --git a/llama/addon/AddonContext.cpp b/llama/addon/AddonContext.cpp
@@ -403,7 +403,7 @@ AddonContext::AddonContext(const Napi::CallbackInfo& info) : Napi::ObjectWrap<Ad
         }
 
         if (options.Has("batchSize")) {
-            context_params.n_batch = options.Get("batchSize").As<Napi::Number>().Uint32Value() + 1; // +1 to handle edge cases with SWA KV cache
+            context_params.n_batch = options.Get("batchSize").As<Napi::Number>().Uint32Value();
             context_params.n_ubatch = context_params.n_batch; // the batch queue is managed in the JS side, so there's no need for managing it on the C++ side
         }
 
diff --git a/src/evaluator/LlamaContext/LlamaContext.ts b/src/evaluator/LlamaContext/LlamaContext.ts
@@ -125,7 +125,11 @@ export class LlamaContext {
         this._swaFullCache = !!swaFullCache;
         this._ctx = new this._llama._bindings.AddonContext(this._model._model, removeNullFields({
             contextSize: this._contextSize * this._totalSequences, // each sequence needs its own <contextSize> of cells
-            batchSize: this._batchSize,
+            batchSize: this._batchSize + (
+                (!this._swaFullCache && this.model.fileInsights.swaSize != null && this.model.fileInsights.swaSize > 0)
+                    ? 1 // +1 to handle edge cases with SWA KV cache
+                    : 0
+            ),
             sequences: this._totalSequences,
             flashAttention: this._flashAttention,
             threads: this._idealThreads,

Original file line number	Diff line number	Diff line change
`@@ -403,7 +403,7 @@ AddonContext::AddonContext(const Napi::CallbackInfo& info) : Napi::ObjectWrap<Ad`
`403`	`403`	`}`
`404`	`404`
`405`	`405`	`if (options.Has("batchSize")) {`
`406`		`- context_params.n_batch = options.Get("batchSize").As<Napi::Number>().Uint32Value() + 1; // +1 to handle edge cases with SWA KV cache`
	`406`	`+ context_params.n_batch = options.Get("batchSize").As<Napi::Number>().Uint32Value();`
`407`	`407`	`context_params.n_ubatch = context_params.n_batch; // the batch queue is managed in the JS side, so there's no need for managing it on the C++ side`
`408`	`408`	`}`
`409`	`409`