Skip to content

Commit f084b2c

Browse files
committed
fix: Vulkan parallel decoding
1 parent b9d91b3 commit f084b2c

File tree

1 file changed

+11
-1
lines changed

1 file changed

+11
-1
lines changed

src/evaluator/LlamaContext/LlamaContext.ts

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import {acquireLock, AsyncDisposeAggregator, DisposeAggregator, DisposedError, EventRelay, withLock} from "lifecycle-utils";
1+
import {acquireLock, AsyncDisposeAggregator, DisposeAggregator, DisposedError, EventRelay, Lock, withLock} from "lifecycle-utils";
22
import {removeNullFields} from "../../utils/removeNullFields.js";
33
import {Token} from "../../types.js";
44
import {AddonContext, AddonModelLora, BatchLogitIndex} from "../../bindings/AddonTypes.js";
@@ -32,6 +32,10 @@ const defaultFailedCreationRemedy = {
3232
} as const satisfies Required<LlamaContextOptions["failedCreationRemedy"]>;
3333
const defaultEvaluationPriority: EvaluationPriority = 5;
3434

35+
const decodeSyncWorkaround = {
36+
vulkanLock: {}
37+
};
38+
3539
export class LlamaContext {
3640
/** @internal */ public readonly _llama: Llama;
3741
/** @internal */ public readonly _ctx: AddonContext;
@@ -473,13 +477,19 @@ export class LlamaContext {
473477
? await allocationResult ?? []
474478
: allocationResult ?? [];
475479

480+
let decodeLock: Lock | undefined;
481+
// this is a workaround to prevent Vulkan from crashing the process when decoding on multiple contexts in parallel
482+
if (this._llama.gpu === "vulkan")
483+
decodeLock = await acquireLock(decodeSyncWorkaround.vulkanLock, "decode");
484+
476485
try {
477486
if (threadsToUse != null)
478487
this._ctx.setThreads(threadsToUse);
479488

480489
await this._ctx.decodeBatch();
481490
consumerHandle?.dispose();
482491
} catch (err) {
492+
decodeLock?.dispose();
483493
consumerHandle?.dispose();
484494
this._dispatchErrorForQueuedDecodesAndDequeue(currentQueuedDecodeItems, err);
485495
return;

0 commit comments

Comments
 (0)