fix: Vulkan parallel decoding

giladgd · giladgd · commit f084b2cf9653 · 2025-01-26T03:50:40.000+02:00
diff --git a/src/evaluator/LlamaContext/LlamaContext.ts b/src/evaluator/LlamaContext/LlamaContext.ts
@@ -1,4 +1,4 @@
-import {acquireLock, AsyncDisposeAggregator, DisposeAggregator, DisposedError, EventRelay, withLock} from "lifecycle-utils";
+import {acquireLock, AsyncDisposeAggregator, DisposeAggregator, DisposedError, EventRelay, Lock, withLock} from "lifecycle-utils";
 import {removeNullFields} from "../../utils/removeNullFields.js";
 import {Token} from "../../types.js";
 import {AddonContext, AddonModelLora, BatchLogitIndex} from "../../bindings/AddonTypes.js";
@@ -32,6 +32,10 @@ const defaultFailedCreationRemedy = {
 } as const satisfies Required<LlamaContextOptions["failedCreationRemedy"]>;
 const defaultEvaluationPriority: EvaluationPriority = 5;
 
+const decodeSyncWorkaround = {
+    vulkanLock: {}
+};
+
 export class LlamaContext {
     /** @internal */ public readonly _llama: Llama;
     /** @internal */ public readonly _ctx: AddonContext;
@@ -473,13 +477,19 @@ export class LlamaContext {
                         ? await allocationResult ?? []
                         : allocationResult ?? [];
 
+                    let decodeLock: Lock | undefined;
+                    // this is a workaround to prevent Vulkan from crashing the process when decoding on multiple contexts in parallel
+                    if (this._llama.gpu === "vulkan")
+                        decodeLock = await acquireLock(decodeSyncWorkaround.vulkanLock, "decode");
+
                     try {
                         if (threadsToUse != null)
                             this._ctx.setThreads(threadsToUse);
 
                         await this._ctx.decodeBatch();
                         consumerHandle?.dispose();
                     } catch (err) {
+                        decodeLock?.dispose();
                         consumerHandle?.dispose();
                         this._dispatchErrorForQueuedDecodesAndDequeue(currentQueuedDecodeItems, err);
                         return;