@@ -258,7 +258,9 @@ def llama_get_state_size(ctx: llama_context_p) -> c_size_t:
258258# Copies the state to the specified destination address.
259259# Destination needs to have allocated enough memory.
260260# Returns the number of bytes copied
261- def llama_copy_state_data (ctx : llama_context_p , dest : Array [c_uint8 ]) -> c_size_t :
261+ def llama_copy_state_data (
262+ ctx : llama_context_p , dest # type: Array[c_uint8]
263+ ) -> c_size_t :
262264 return _lib .llama_copy_state_data (ctx , dest )
263265
264266
@@ -282,9 +284,9 @@ def llama_set_state_data(
282284def llama_load_session_file (
283285 ctx : llama_context_p ,
284286 path_session : bytes ,
285- tokens_out : Array [llama_token ],
287+ tokens_out , # type : Array[llama_token]
286288 n_token_capacity : c_size_t ,
287- n_token_count_out : _Pointer [c_size_t ],
289+ n_token_count_out , # type : _Pointer[c_size_t]
288290) -> c_size_t :
289291 return _lib .llama_load_session_file (
290292 ctx , path_session , tokens_out , n_token_capacity , n_token_count_out
@@ -304,7 +306,7 @@ def llama_load_session_file(
304306def llama_save_session_file (
305307 ctx : llama_context_p ,
306308 path_session : bytes ,
307- tokens : Array [llama_token ],
309+ tokens , # type : Array[llama_token]
308310 n_token_count : c_size_t ,
309311) -> c_size_t :
310312 return _lib .llama_save_session_file (ctx , path_session , tokens , n_token_count )
@@ -325,7 +327,7 @@ def llama_save_session_file(
325327# Returns 0 on success
326328def llama_eval (
327329 ctx : llama_context_p ,
328- tokens : Array [llama_token ],
330+ tokens , # type : Array[llama_token]
329331 n_tokens : c_int ,
330332 n_past : c_int ,
331333 n_threads : c_int ,
@@ -345,7 +347,7 @@ def llama_eval(
345347def llama_tokenize (
346348 ctx : llama_context_p ,
347349 text : bytes ,
348- tokens : Array [llama_token ],
350+ tokens , # type : Array[llama_token]
349351 n_max_tokens : c_int ,
350352 add_bos : c_bool ,
351353) -> c_int :
@@ -444,8 +446,8 @@ def llama_token_nl() -> llama_token:
444446# @details Repetition penalty described in CTRL academic paper https://arxiv.org/abs/1909.05858, with negative logit fix.
445447def llama_sample_repetition_penalty (
446448 ctx : llama_context_p ,
447- candidates : _Pointer [llama_token_data_array ],
448- last_tokens_data : Array [llama_token ],
449+ candidates , # type : _Pointer[llama_token_data_array]
450+ last_tokens_data , # type : Array[llama_token]
449451 last_tokens_size : c_int ,
450452 penalty : c_float ,
451453):
@@ -467,8 +469,8 @@ def llama_sample_repetition_penalty(
467469# @details Frequency and presence penalties described in OpenAI API https://platform.openai.com/docs/api-reference/parameter-details.
468470def llama_sample_frequency_and_presence_penalties (
469471 ctx : llama_context_p ,
470- candidates : _Pointer [llama_token_data_array ],
471- last_tokens_data : Array [llama_token ],
472+ candidates , # type : _Pointer[llama_token_data_array]
473+ last_tokens_data , # type : Array[llama_token]
472474 last_tokens_size : c_int ,
473475 alpha_frequency : c_float ,
474476 alpha_presence : c_float ,
@@ -495,7 +497,9 @@ def llama_sample_frequency_and_presence_penalties(
495497
496498
497499# @details Sorts candidate tokens by their logits in descending order and calculate probabilities based on logits.
498- def llama_sample_softmax (ctx : llama_context_p , candidates : _Pointer [llama_token_data ]):
500+ def llama_sample_softmax (
501+ ctx : llama_context_p , candidates # type: _Pointer[llama_token_data]
502+ ):
499503 return _lib .llama_sample_softmax (ctx , candidates )
500504
501505
@@ -509,7 +513,7 @@ def llama_sample_softmax(ctx: llama_context_p, candidates: _Pointer[llama_token_
509513# @details Top-K sampling described in academic paper "The Curious Case of Neural Text Degeneration" https://arxiv.org/abs/1904.09751
510514def llama_sample_top_k (
511515 ctx : llama_context_p ,
512- candidates : _Pointer [llama_token_data_array ],
516+ candidates , # type : _Pointer[llama_token_data_array]
513517 k : c_int ,
514518 min_keep : c_size_t = c_size_t (1 ),
515519):
@@ -528,7 +532,7 @@ def llama_sample_top_k(
528532# @details Nucleus sampling described in academic paper "The Curious Case of Neural Text Degeneration" https://arxiv.org/abs/1904.09751
529533def llama_sample_top_p (
530534 ctx : llama_context_p ,
531- candidates : _Pointer [llama_token_data_array ],
535+ candidates , # type : _Pointer[llama_token_data_array]
532536 p : c_float ,
533537 min_keep : c_size_t = c_size_t (1 ),
534538):
@@ -547,7 +551,7 @@ def llama_sample_top_p(
547551# @details Tail Free Sampling described in https://www.trentonbricken.com/Tail-Free-Sampling/.
548552def llama_sample_tail_free (
549553 ctx : llama_context_p ,
550- candidates : _Pointer [llama_token_data_array ],
554+ candidates , # type : _Pointer[llama_token_data_array]
551555 z : c_float ,
552556 min_keep : c_size_t = c_size_t (1 ),
553557):
@@ -566,7 +570,7 @@ def llama_sample_tail_free(
566570# @details Locally Typical Sampling implementation described in the paper https://arxiv.org/abs/2202.00666.
567571def llama_sample_typical (
568572 ctx : llama_context_p ,
569- candidates : _Pointer [llama_token_data_array ],
573+ candidates , # type : _Pointer[llama_token_data_array]
570574 p : c_float ,
571575 min_keep : c_size_t = c_size_t (1 ),
572576):
@@ -583,7 +587,9 @@ def llama_sample_typical(
583587
584588
585589def llama_sample_temperature (
586- ctx : llama_context_p , candidates : _Pointer [llama_token_data_array ], temp : c_float
590+ ctx : llama_context_p ,
591+ candidates , # type: _Pointer[llama_token_data_array]
592+ temp : c_float ,
587593):
588594 return _lib .llama_sample_temperature (ctx , candidates , temp )
589595
@@ -604,11 +610,11 @@ def llama_sample_temperature(
604610# @param mu Maximum cross-entropy. This value is initialized to be twice the target cross-entropy (`2 * tau`) and is updated in the algorithm based on the error between the target and observed surprisal.
605611def llama_sample_token_mirostat (
606612 ctx : llama_context_p ,
607- candidates : _Pointer [llama_token_data_array ],
613+ candidates , # type : _Pointer[llama_token_data_array]
608614 tau : c_float ,
609615 eta : c_float ,
610616 m : c_int ,
611- mu : _Pointer [c_float ],
617+ mu , # type : _Pointer[c_float]
612618) -> llama_token :
613619 return _lib .llama_sample_token_mirostat (ctx , candidates , tau , eta , m , mu )
614620
@@ -631,10 +637,10 @@ def llama_sample_token_mirostat(
631637# @param mu Maximum cross-entropy. This value is initialized to be twice the target cross-entropy (`2 * tau`) and is updated in the algorithm based on the error between the target and observed surprisal.
632638def llama_sample_token_mirostat_v2 (
633639 ctx : llama_context_p ,
634- candidates : _Pointer [llama_token_data_array ],
640+ candidates , # type : _Pointer[llama_token_data_array]
635641 tau : c_float ,
636642 eta : c_float ,
637- mu : _Pointer [c_float ],
643+ mu , # type : _Pointer[c_float]
638644) -> llama_token :
639645 return _lib .llama_sample_token_mirostat_v2 (ctx , candidates , tau , eta , mu )
640646
@@ -651,7 +657,8 @@ def llama_sample_token_mirostat_v2(
651657
652658# @details Selects the token with the highest probability.
653659def llama_sample_token_greedy (
654- ctx : llama_context_p , candidates : _Pointer [llama_token_data_array ]
660+ ctx : llama_context_p ,
661+ candidates , # type: _Pointer[llama_token_data_array]
655662) -> llama_token :
656663 return _lib .llama_sample_token_greedy (ctx , candidates )
657664
@@ -665,7 +672,8 @@ def llama_sample_token_greedy(
665672
666673# @details Randomly selects a token from the candidates based on their probabilities.
667674def llama_sample_token (
668- ctx : llama_context_p , candidates : _Pointer [llama_token_data_array ]
675+ ctx : llama_context_p ,
676+ candidates , # type: _Pointer[llama_token_data_array]
669677) -> llama_token :
670678 return _lib .llama_sample_token (ctx , candidates )
671679
0 commit comments