@@ -2271,6 +2271,20 @@ def llama_token_eos(model: llama_model_p, /) -> int:
22712271 ...
22722272
22732273
2274+ # LLAMA_API llama_token llama_token_cls(const struct llama_model * model); // classification
2275+ @ctypes_function ("llama_token_cls" , [llama_model_p_ctypes ], llama_token )
2276+ def llama_token_cls (model : llama_model_p , / ) -> int :
2277+ """classification"""
2278+ ...
2279+
2280+
2281+ # LLAMA_API llama_token llama_token_sep(const struct llama_model * model); // sentence separator
2282+ @ctypes_function ("llama_token_sep" , [llama_model_p_ctypes ], llama_token )
2283+ def llama_token_sep (model : llama_model_p , / ) -> int :
2284+ """sentence separator"""
2285+ ...
2286+
2287+
22742288# LLAMA_API llama_token llama_token_nl (const struct llama_model * model); // next-line
22752289@ctypes_function ("llama_token_nl" , [llama_model_p_ctypes ], llama_token )
22762290def llama_token_nl (model : llama_model_p , / ) -> int :
@@ -2326,16 +2340,16 @@ def llama_token_eot(model: llama_model_p, /) -> int: ...
23262340# /// @param tokens The tokens pointer must be large enough to hold the resulting tokens.
23272341# /// @return Returns the number of tokens on success, no more than n_tokens_max
23282342# /// @return Returns a negative number on failure - the number of tokens that would have been returned
2329- # /// @param special Allow tokenizing special and/or control tokens which otherwise are not exposed and treated as plaintext.
2330- # /// Does not insert a leading space.
2343+ # /// @param parse_special Allow tokenizing special and/or control tokens which otherwise are not exposed and treated
2344+ # /// as plaintext. Does not insert a leading space.
23312345# LLAMA_API int32_t llama_tokenize(
23322346# const struct llama_model * model,
23332347# const char * text,
23342348# int32_t text_len,
23352349# llama_token * tokens,
23362350# int32_t n_tokens_max,
2337- # bool add_bos ,
2338- # bool special );
2351+ # bool add_special ,
2352+ # bool parse_special );
23392353@ctypes_function (
23402354 "llama_tokenize" ,
23412355 [
@@ -2355,8 +2369,8 @@ def llama_tokenize(
23552369 text_len : Union [ctypes .c_int , int ],
23562370 tokens : CtypesArray [llama_token ],
23572371 n_tokens_max : Union [ctypes .c_int , int ],
2358- add_bos : Union [ctypes .c_bool , bool ],
2359- special : Union [ctypes .c_bool , bool ],
2372+ add_special : Union [ctypes .c_bool , bool ],
2373+ parse_special : Union [ctypes .c_bool , bool ],
23602374 / ,
23612375) -> int :
23622376 """Convert the provided text into tokens.
@@ -2367,9 +2381,8 @@ def llama_tokenize(
23672381 text_len: The length of the text.
23682382 tokens: The tokens pointer must be large enough to hold the resulting tokens.
23692383 n_max_tokens: The maximum number of tokens to return.
2370- add_bos: Whether to add a beginning-of-sentence token.
2371- special: Allow tokenizing special and/or control tokens which otherwise are not exposed and treated as plaintext.
2372- Does not insert a leading space.
2384+ add_special: Allow tokenizing special and/or control tokens which otherwise are not exposed and treated as plaintext. Does not insert a leading space.
2385+ parse_special: Allow parsing special tokens.
23732386
23742387 Returns:
23752388 Returns the number of tokens on success, no more than n_tokens_max
0 commit comments