@@ -1742,115 +1742,6 @@ def llama_apply_adapter_cvec(
17421742# //
17431743
17441744
1745- # // Information associated with an individual cell in the KV cache view.
1746- # struct llama_kv_cache_view_cell {
1747- # // The position for this cell. Takes KV cache shifts into account.
1748- # // May be negative if the cell is not populated.
1749- # llama_pos pos;
1750- # };
1751- class llama_kv_cache_view_cell (ctypes .Structure ):
1752- """Information associated with an individual cell in the KV cache view.
1753-
1754- Attributes:
1755- pos (llama_pos): The position for this cell. Takes KV cache shifts into account.
1756- May be negative if the cell is not populated."""
1757-
1758- if TYPE_CHECKING :
1759- pos : llama_pos
1760-
1761- _fields_ = [("pos" , llama_pos )]
1762-
1763-
1764- # // An updateable view of the KV cache.
1765- # struct llama_kv_cache_view {
1766- # // Number of KV cache cells. This will be the same as the context size.
1767- # int32_t n_cells;
1768-
1769- # // Maximum number of sequences that can exist in a cell. It's not an error
1770- # // if there are more sequences in a cell than this value, however they will
1771- # // not be visible in the view cells_sequences.
1772- # int32_t n_seq_max;
1773-
1774- # // Number of tokens in the cache. For example, if there are two populated
1775- # // cells, the first with 1 sequence id in it and the second with 2 sequence
1776- # // ids then you'll have 3 tokens.
1777- # int32_t token_count;
1778-
1779- # // Number of populated cache cells.
1780- # int32_t used_cells;
1781-
1782- # // Maximum contiguous empty slots in the cache.
1783- # int32_t max_contiguous;
1784-
1785- # // Index to the start of the max_contiguous slot range. Can be negative
1786- # // when cache is full.
1787- # int32_t max_contiguous_idx;
1788-
1789- # // Information for an individual cell.
1790- # struct llama_kv_cache_view_cell * cells;
1791-
1792-
1793- # // The sequences for each cell. There will be n_seq_max items per cell.
1794- # llama_seq_id * cells_sequences;
1795- # };
1796- class llama_kv_cache_view (ctypes .Structure ):
1797- if TYPE_CHECKING :
1798- n_cells : int
1799- n_max_seq : int
1800- token_count : int
1801- used_cells : int
1802- max_contiguous : int
1803- max_contiguous_idx : int
1804- cells : CtypesArray [llama_kv_cache_view_cell ]
1805- cells_sequences : CtypesArray [llama_seq_id ]
1806-
1807- _fields_ = [
1808- ("n_cells" , ctypes .c_int32 ),
1809- ("n_max_seq" , ctypes .c_int32 ),
1810- ("token_count" , ctypes .c_int32 ),
1811- ("used_cells" , ctypes .c_int32 ),
1812- ("max_contiguous" , ctypes .c_int32 ),
1813- ("max_contiguous_idx" , ctypes .c_int32 ),
1814- ("cells" , ctypes .POINTER (llama_kv_cache_view_cell )),
1815- ("cells_sequences" , ctypes .POINTER (llama_seq_id )),
1816- ]
1817-
1818-
1819- llama_kv_cache_view_p = ctypes .POINTER (llama_kv_cache_view )
1820-
1821-
1822- # // Create an empty KV cache view. (use only for debugging purposes)
1823- # LLAMA_API struct llama_kv_cache_view llama_kv_cache_view_init(const struct llama_context * ctx, int32_t n_seq_max);
1824- @ctypes_function (
1825- "llama_kv_cache_view_init" ,
1826- [llama_context_p_ctypes , ctypes .c_int32 ],
1827- llama_kv_cache_view ,
1828- )
1829- def llama_kv_cache_view_init (
1830- ctx : llama_context_p , n_seq_max : Union [ctypes .c_int32 , int ], /
1831- ) -> llama_kv_cache_view :
1832- """Create an empty KV cache view. (use only for debugging purposes)"""
1833- ...
1834-
1835-
1836- # // Free a KV cache view. (use only for debugging purposes)
1837- # LLAMA_API void llama_kv_cache_view_free(struct llama_kv_cache_view * view);
1838- @ctypes_function ("llama_kv_cache_view_free" , [llama_kv_cache_view_p ], None )
1839- def llama_kv_cache_view_free (view : "ctypes.pointer[llama_kv_cache_view]" , / ): # type: ignore
1840- """Free a KV cache view. (use only for debugging purposes)"""
1841- ...
1842-
1843-
1844- # // Update the KV cache view structure with the current state of the KV cache. (use only for debugging purposes)
1845- # LLAMA_API void llama_kv_cache_view_update(const struct llama_context * ctx, struct llama_kv_cache_view * view);
1846- @ctypes_function (
1847- "llama_kv_cache_view_update" , [llama_context_p_ctypes , llama_kv_cache_view_p ], None
1848- )
1849- def llama_kv_cache_view_update (ctx : llama_context_p , view : CtypesPointerOrRef [llama_kv_cache_view ], / ): # type: ignore
1850- """Update the KV cache view structure with the current state of the KV cache. (use only for debugging purposes)"""
1851- ...
1852-
1853-
18541745# // Returns the number of tokens in the KV cache (slow, use only for debug)
18551746# // If a KV cell has multiple sequences assigned to it, it will be counted multiple times
18561747# LLAMA_API int32_t llama_kv_self_n_tokens(const struct llama_context * ctx);
0 commit comments