@@ -77,6 +77,7 @@ class llama_context_params(Structure):
7777 c_bool ,
7878 ), # the llama_eval() call computes all logits, not just the last one
7979 ("vocab_only" , c_bool ), # only load the vocabulary, no weights
80+ ("use_mmap" , c_bool ), # use mmap if possible
8081 ("use_mlock" , c_bool ), # force system to keep model in RAM
8182 ("embedding" , c_bool ), # embedding mode only
8283 # called with a progress value between 0 and 1, pass NULL to disable
@@ -99,6 +100,17 @@ def llama_context_default_params() -> llama_context_params:
99100_lib .llama_context_default_params .argtypes = []
100101_lib .llama_context_default_params .restype = llama_context_params
101102
103+ def llama_mmap_supported () -> c_bool :
104+ return _lib .llama_mmap_supported ()
105+
106+ _lib .llama_mmap_supported .argtypes = []
107+ _lib .llama_mmap_supported .restype = c_bool
108+
109+ def llama_mlock_supported () -> c_bool :
110+ return _lib .llama_mlock_supported ()
111+
112+ _lib .llama_mlock_supported .argtypes = []
113+ _lib .llama_mlock_supported .restype = c_bool
102114
103115# Various functions for loading a ggml llama model.
104116# Allocate (almost) all memory needed for the model.
0 commit comments