Skip to content

Commit b864fc0

Browse files
committed
fix: Vulkan parallel decoding
1 parent f084b2c commit b864fc0

File tree

1 file changed

+6
-6
lines changed

1 file changed

+6
-6
lines changed

src/evaluator/LlamaContext/LlamaContext.ts

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -477,19 +477,13 @@ export class LlamaContext {
477477
? await allocationResult ?? []
478478
: allocationResult ?? [];
479479

480-
let decodeLock: Lock | undefined;
481-
// this is a workaround to prevent Vulkan from crashing the process when decoding on multiple contexts in parallel
482-
if (this._llama.gpu === "vulkan")
483-
decodeLock = await acquireLock(decodeSyncWorkaround.vulkanLock, "decode");
484-
485480
try {
486481
if (threadsToUse != null)
487482
this._ctx.setThreads(threadsToUse);
488483

489484
await this._ctx.decodeBatch();
490485
consumerHandle?.dispose();
491486
} catch (err) {
492-
decodeLock?.dispose();
493487
consumerHandle?.dispose();
494488
this._dispatchErrorForQueuedDecodesAndDequeue(currentQueuedDecodeItems, err);
495489
return;
@@ -583,11 +577,17 @@ export class LlamaContext {
583577
return;
584578
}
585579

580+
let decodeLock: Lock | undefined;
581+
// this is a workaround to prevent Vulkan from crashing the process when decoding on multiple contexts in parallel
582+
if (this._llama.gpu === "vulkan")
583+
decodeLock = await acquireLock(decodeSyncWorkaround.vulkanLock, "decode");
584+
586585
try {
587586
await decodeTokenBatchItems(currentBatchItems, currentBatchSize);
588587

589588
shouldHaveAnotherLoop = this._queuedDecodes.length > 0;
590589
} finally {
590+
decodeLock?.dispose();
591591
preventDisposalHandle.dispose();
592592
}
593593
}

0 commit comments

Comments
 (0)