|
1 | | -import {acquireLock, AsyncDisposeAggregator, DisposeAggregator, DisposedError, EventRelay, withLock} from "lifecycle-utils"; |
| 1 | +import {acquireLock, AsyncDisposeAggregator, DisposeAggregator, DisposedError, EventRelay, Lock, withLock} from "lifecycle-utils"; |
2 | 2 | import {removeNullFields} from "../../utils/removeNullFields.js"; |
3 | 3 | import {Token} from "../../types.js"; |
4 | 4 | import {AddonContext, AddonModelLora, BatchLogitIndex} from "../../bindings/AddonTypes.js"; |
@@ -32,6 +32,10 @@ const defaultFailedCreationRemedy = { |
32 | 32 | } as const satisfies Required<LlamaContextOptions["failedCreationRemedy"]>; |
33 | 33 | const defaultEvaluationPriority: EvaluationPriority = 5; |
34 | 34 |
|
| 35 | +const decodeSyncWorkaround = { |
| 36 | + vulkanLock: {} |
| 37 | +}; |
| 38 | + |
35 | 39 | export class LlamaContext { |
36 | 40 | /** @internal */ public readonly _llama: Llama; |
37 | 41 | /** @internal */ public readonly _ctx: AddonContext; |
@@ -473,13 +477,19 @@ export class LlamaContext { |
473 | 477 | ? await allocationResult ?? [] |
474 | 478 | : allocationResult ?? []; |
475 | 479 |
|
| 480 | + let decodeLock: Lock | undefined; |
| 481 | + // this is a workaround to prevent Vulkan from crashing the process when decoding on multiple contexts in parallel |
| 482 | + if (this._llama.gpu === "vulkan") |
| 483 | + decodeLock = await acquireLock(decodeSyncWorkaround.vulkanLock, "decode"); |
| 484 | + |
476 | 485 | try { |
477 | 486 | if (threadsToUse != null) |
478 | 487 | this._ctx.setThreads(threadsToUse); |
479 | 488 |
|
480 | 489 | await this._ctx.decodeBatch(); |
481 | 490 | consumerHandle?.dispose(); |
482 | 491 | } catch (err) { |
| 492 | + decodeLock?.dispose(); |
483 | 493 | consumerHandle?.dispose(); |
484 | 494 | this._dispatchErrorForQueuedDecodesAndDequeue(currentQueuedDecodeItems, err); |
485 | 495 | return; |
|
0 commit comments