@@ -1055,7 +1055,9 @@ export class LlamaContextSequence {
10551055 *
10561056 * This index can be greater than `0` only when SWA (Sliding Window Attention) is used (only on supported models).
10571057 *
1058- * When SWA is used, this index will usually be `Math.max(0, .nextTokenIndex - .model.fileInsights.swaSize)` or larger.
1058+ * When SWA is used, this index will usually be `Math.max(-1, .nextTokenIndex - .model.fileInsights.swaSize)` or larger.
1059+ *
1060+ * When the KV cache is empty, this index will be `-1`.
10591061 *
10601062 * You can disable SWA by setting the `swaFullCache` option to `true` when creating a context.
10611063 */
@@ -1207,6 +1209,8 @@ export class LlamaContextSequence {
12071209 ) {
12081210 this . _ensureNotDisposed ( ) ;
12091211
1212+ let awaitPromise : Promise < void > | undefined ;
1213+
12101214 await withLock ( this . _context , "context" , async ( ) => {
12111215 this . _ensureNotDisposed ( ) ;
12121216
@@ -1250,7 +1254,7 @@ export class LlamaContextSequence {
12501254
12511255 const minKvCachePosition = ( this . _contextTokens . length === 0 && this . _loadedTokenPredictions . length === 0 )
12521256 ? 0
1253- : this . _context . _ctx . getSequenceKvCacheMinPosition ( this . _sequenceId ) ;
1257+ : Math . max ( 0 , this . _context . _ctx . getSequenceKvCacheMinPosition ( this . _sequenceId ) ) ;
12541258 if ( resolvedRanges [ 0 ] != null && resolvedRanges [ 0 ] . start <= minKvCachePosition )
12551259 // we have to drop the cache and reevaluate the sequence due to missing KV cache
12561260 deletionSuccessful = false ;
@@ -1310,8 +1314,12 @@ export class LlamaContextSequence {
13101314 this . _nextTokenIndex = 0 ;
13111315 this . _context . _ctx . disposeSequence ( this . _sequenceId ) ;
13121316
1313- await this . evaluateWithoutGeneratingNewTokens ( newSequenceTokens , { _skipLock : skipLock } ) ;
1317+ // wait for the evaluation outside the "context" lock to avoid deadlocks
1318+ awaitPromise = this . evaluateWithoutGeneratingNewTokens ( newSequenceTokens , { _skipLock : skipLock } ) ;
13141319 } ) ;
1320+
1321+ if ( awaitPromise != null )
1322+ await awaitPromise ;
13151323 }
13161324
13171325 /**
0 commit comments