@@ -313,27 +313,27 @@ static SafeLLamaContextHandle()
313313 /// <summary>
314314 /// Defragment the KV cache. This will be applied:
315315 /// - lazily on next llama_decode()
316- /// - explicitly with llama_kv_cache_update ()
316+ /// - explicitly with llama_kv_self_update ()
317317 /// </summary>
318318 /// <param name="ctx"></param>
319319 /// <returns></returns>
320320 [ DllImport ( NativeApi . libraryName , CallingConvention = CallingConvention . Cdecl ) ]
321- private static extern void llama_kv_cache_defrag ( SafeLLamaContextHandle ctx ) ;
321+ private static extern void llama_kv_self_defrag ( SafeLLamaContextHandle ctx ) ;
322322
323323 /// <summary>
324324 /// Apply the KV cache updates (such as K-shifts, defragmentation, etc.)
325325 /// </summary>
326326 /// <param name="ctx"></param>
327327 [ DllImport ( NativeApi . libraryName , CallingConvention = CallingConvention . Cdecl ) ]
328- private static extern void llama_kv_cache_update ( SafeLLamaContextHandle ctx ) ;
328+ private static extern void llama_kv_self_update ( SafeLLamaContextHandle ctx ) ;
329329
330330 /// <summary>
331331 /// Check if the context supports KV cache shifting
332332 /// </summary>
333333 /// <param name="ctx"></param>
334334 /// <returns></returns>
335335 [ DllImport ( NativeApi . libraryName , CallingConvention = CallingConvention . Cdecl ) ]
336- private static extern bool llama_kv_cache_can_shift ( SafeLLamaContextHandle ctx ) ;
336+ private static extern bool llama_kv_self_can_shift ( SafeLLamaContextHandle ctx ) ;
337337
338338 [ DllImport ( NativeApi . libraryName , CallingConvention = CallingConvention . Cdecl ) ]
339339 private static extern LLamaPerfContextTimings llama_perf_context ( SafeLLamaContextHandle ctx ) ;
@@ -386,6 +386,9 @@ static SafeLLamaContextHandle()
386386 /// <returns>A pointer to the first float in an embedding, length = ctx.EmbeddingSize</returns>
387387 [ DllImport ( NativeApi . libraryName , CallingConvention = CallingConvention . Cdecl ) ]
388388 private static extern unsafe float * llama_get_embeddings_ith ( SafeLLamaContextHandle ctx , int i ) ;
389+
390+ [ DllImport ( NativeApi . libraryName , CallingConvention = CallingConvention . Cdecl ) ]
391+ private static extern LLamaKvCacheNative llama_get_kv_self ( SafeLLamaContextHandle ctx ) ;
389392 #endregion
390393
391394 #region LoRA
@@ -751,25 +754,25 @@ public void ResetTimings()
751754 /// <summary>
752755 /// Check if the context supports KV cache shifting
753756 /// </summary>
754- public bool KvCacheCanShift => llama_kv_cache_can_shift ( this ) ;
757+ public bool KvCacheCanShift => llama_kv_self_can_shift ( this ) ;
755758
756759 /// <summary>
757760 /// Apply KV cache updates (such as K-shifts, defragmentation, etc.)
758761 /// </summary>
759762 public void KvCacheUpdate ( )
760763 {
761- llama_kv_cache_update ( this ) ;
764+ llama_kv_self_update ( this ) ;
762765 }
763766
764767 /// <summary>
765768 /// Defragment the KV cache. This will be applied:
766769 /// - lazily on next llama_decode()
767- /// - explicitly with llama_kv_cache_update ()
770+ /// - explicitly with llama_kv_self_update ()
768771 /// </summary>
769772 /// <returns></returns>
770773 public void KvCacheDefrag ( )
771774 {
772- llama_kv_cache_defrag ( this ) ;
775+ llama_kv_self_defrag ( this ) ;
773776 }
774777
775778 /// <summary>
@@ -788,7 +791,7 @@ public LLamaKvCacheViewSafeHandle KvCacheGetDebugView(int maxSequences = 4)
788791 /// <returns></returns>
789792 public int KvCacheCountCells ( )
790793 {
791- return NativeApi . llama_get_kv_cache_used_cells ( this ) ;
794+ return NativeApi . llama_kv_self_used_cells ( this ) ;
792795 }
793796
794797 /// <summary>
@@ -798,15 +801,15 @@ public int KvCacheCountCells()
798801 /// <returns></returns>
799802 public int KvCacheCountTokens ( )
800803 {
801- return NativeApi . llama_get_kv_cache_token_count ( this ) ;
804+ return NativeApi . llama_kv_self_n_tokens ( this ) ;
802805 }
803806
804807 /// <summary>
805808 /// Clear the KV cache - both cell info is erased and KV data is zeroed
806809 /// </summary>
807810 public void KvCacheClear ( )
808811 {
809- NativeApi . llama_kv_cache_clear ( this ) ;
812+ NativeApi . llama_kv_self_clear ( this ) ;
810813 }
811814
812815 /// <summary>
@@ -817,7 +820,7 @@ public void KvCacheClear()
817820 /// <param name="p1"></param>
818821 public void KvCacheRemove ( LLamaSeqId seq , LLamaPos p0 , LLamaPos p1 )
819822 {
820- NativeApi . llama_kv_cache_seq_rm ( this , seq , p0 , p1 ) ;
823+ NativeApi . llama_kv_self_seq_rm ( this , seq , p0 , p1 ) ;
821824 }
822825
823826 /// <summary>
@@ -831,7 +834,7 @@ public void KvCacheRemove(LLamaSeqId seq, LLamaPos p0, LLamaPos p1)
831834 /// <param name="p1"></param>
832835 public void KvCacheSequenceCopy ( LLamaSeqId src , LLamaSeqId dest , LLamaPos p0 , LLamaPos p1 )
833836 {
834- NativeApi . llama_kv_cache_seq_cp ( this , src , dest , p0 , p1 ) ;
837+ NativeApi . llama_kv_self_seq_cp ( this , src , dest , p0 , p1 ) ;
835838 }
836839
837840 /// <summary>
@@ -840,7 +843,7 @@ public void KvCacheSequenceCopy(LLamaSeqId src, LLamaSeqId dest, LLamaPos p0, LL
840843 /// <param name="seq"></param>
841844 public void KvCacheSequenceKeep ( LLamaSeqId seq )
842845 {
843- NativeApi . llama_kv_cache_seq_keep ( this , seq ) ;
846+ NativeApi . llama_kv_self_seq_keep ( this , seq ) ;
844847 }
845848
846849 /// <summary>
@@ -854,7 +857,10 @@ public void KvCacheSequenceKeep(LLamaSeqId seq)
854857 /// <param name="delta"></param>
855858 public void KvCacheSequenceAdd ( LLamaSeqId seq , LLamaPos p0 , LLamaPos p1 , int delta )
856859 {
857- NativeApi . llama_kv_cache_seq_add ( this , seq , p0 , p1 , delta ) ;
860+ if ( ! KvCacheCanShift )
861+ throw new InvalidOperationException ( "Cannot shift KV cache (KvCacheCanShift=False)" ) ;
862+
863+ NativeApi . llama_kv_self_seq_add ( this , seq , p0 , p1 , delta ) ;
858864 }
859865
860866 /// <summary>
@@ -869,7 +875,10 @@ public void KvCacheSequenceAdd(LLamaSeqId seq, LLamaPos p0, LLamaPos p1, int del
869875 /// <param name="divisor"></param>
870876 public void KvCacheSequenceDivide ( LLamaSeqId seq , LLamaPos p0 , LLamaPos p1 , int divisor )
871877 {
872- NativeApi . llama_kv_cache_seq_div ( this , seq , p0 , p1 , divisor ) ;
878+ if ( ! KvCacheCanShift )
879+ throw new InvalidOperationException ( "Cannot shift KV cache (KvCacheCanShift=False)" ) ;
880+
881+ NativeApi . llama_kv_self_seq_div ( this , seq , p0 , p1 , divisor ) ;
873882 }
874883
875884 /// <summary>
@@ -879,7 +888,7 @@ public void KvCacheSequenceDivide(LLamaSeqId seq, LLamaPos p0, LLamaPos p1, int
879888 /// <returns></returns>
880889 public LLamaPos KvCacheMaxPosition ( LLamaSeqId seq )
881890 {
882- return NativeApi . llama_kv_cache_seq_pos_max ( this , seq ) ;
891+ return NativeApi . llama_kv_self_seq_pos_max ( this , seq ) ;
883892 }
884893 #endregion
885894 }
0 commit comments