Skip to content

Commit 3e28e0e

Browse files
committed
Fix: runtime type errors
1 parent e24c3d7 commit 3e28e0e

File tree

1 file changed

+30
-22
lines changed

1 file changed

+30
-22
lines changed

llama_cpp/llama_cpp.py

Lines changed: 30 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -258,7 +258,9 @@ def llama_get_state_size(ctx: llama_context_p) -> c_size_t:
258258
# Copies the state to the specified destination address.
259259
# Destination needs to have allocated enough memory.
260260
# Returns the number of bytes copied
261-
def llama_copy_state_data(ctx: llama_context_p, dest: Array[c_uint8]) -> c_size_t:
261+
def llama_copy_state_data(
262+
ctx: llama_context_p, dest # type: Array[c_uint8]
263+
) -> c_size_t:
262264
return _lib.llama_copy_state_data(ctx, dest)
263265

264266

@@ -282,9 +284,9 @@ def llama_set_state_data(
282284
def llama_load_session_file(
283285
ctx: llama_context_p,
284286
path_session: bytes,
285-
tokens_out: Array[llama_token],
287+
tokens_out, # type: Array[llama_token]
286288
n_token_capacity: c_size_t,
287-
n_token_count_out: _Pointer[c_size_t],
289+
n_token_count_out, # type: _Pointer[c_size_t]
288290
) -> c_size_t:
289291
return _lib.llama_load_session_file(
290292
ctx, path_session, tokens_out, n_token_capacity, n_token_count_out
@@ -304,7 +306,7 @@ def llama_load_session_file(
304306
def llama_save_session_file(
305307
ctx: llama_context_p,
306308
path_session: bytes,
307-
tokens: Array[llama_token],
309+
tokens, # type: Array[llama_token]
308310
n_token_count: c_size_t,
309311
) -> c_size_t:
310312
return _lib.llama_save_session_file(ctx, path_session, tokens, n_token_count)
@@ -325,7 +327,7 @@ def llama_save_session_file(
325327
# Returns 0 on success
326328
def llama_eval(
327329
ctx: llama_context_p,
328-
tokens: Array[llama_token],
330+
tokens, # type: Array[llama_token]
329331
n_tokens: c_int,
330332
n_past: c_int,
331333
n_threads: c_int,
@@ -345,7 +347,7 @@ def llama_eval(
345347
def llama_tokenize(
346348
ctx: llama_context_p,
347349
text: bytes,
348-
tokens: Array[llama_token],
350+
tokens, # type: Array[llama_token]
349351
n_max_tokens: c_int,
350352
add_bos: c_bool,
351353
) -> c_int:
@@ -444,8 +446,8 @@ def llama_token_nl() -> llama_token:
444446
# @details Repetition penalty described in CTRL academic paper https://arxiv.org/abs/1909.05858, with negative logit fix.
445447
def llama_sample_repetition_penalty(
446448
ctx: llama_context_p,
447-
candidates: _Pointer[llama_token_data_array],
448-
last_tokens_data: Array[llama_token],
449+
candidates, # type: _Pointer[llama_token_data_array]
450+
last_tokens_data, # type: Array[llama_token]
449451
last_tokens_size: c_int,
450452
penalty: c_float,
451453
):
@@ -467,8 +469,8 @@ def llama_sample_repetition_penalty(
467469
# @details Frequency and presence penalties described in OpenAI API https://platform.openai.com/docs/api-reference/parameter-details.
468470
def llama_sample_frequency_and_presence_penalties(
469471
ctx: llama_context_p,
470-
candidates: _Pointer[llama_token_data_array],
471-
last_tokens_data: Array[llama_token],
472+
candidates, # type: _Pointer[llama_token_data_array]
473+
last_tokens_data, # type: Array[llama_token]
472474
last_tokens_size: c_int,
473475
alpha_frequency: c_float,
474476
alpha_presence: c_float,
@@ -495,7 +497,9 @@ def llama_sample_frequency_and_presence_penalties(
495497

496498

497499
# @details Sorts candidate tokens by their logits in descending order and calculate probabilities based on logits.
498-
def llama_sample_softmax(ctx: llama_context_p, candidates: _Pointer[llama_token_data]):
500+
def llama_sample_softmax(
501+
ctx: llama_context_p, candidates # type: _Pointer[llama_token_data]
502+
):
499503
return _lib.llama_sample_softmax(ctx, candidates)
500504

501505

@@ -509,7 +513,7 @@ def llama_sample_softmax(ctx: llama_context_p, candidates: _Pointer[llama_token_
509513
# @details Top-K sampling described in academic paper "The Curious Case of Neural Text Degeneration" https://arxiv.org/abs/1904.09751
510514
def llama_sample_top_k(
511515
ctx: llama_context_p,
512-
candidates: _Pointer[llama_token_data_array],
516+
candidates, # type: _Pointer[llama_token_data_array]
513517
k: c_int,
514518
min_keep: c_size_t = c_size_t(1),
515519
):
@@ -528,7 +532,7 @@ def llama_sample_top_k(
528532
# @details Nucleus sampling described in academic paper "The Curious Case of Neural Text Degeneration" https://arxiv.org/abs/1904.09751
529533
def llama_sample_top_p(
530534
ctx: llama_context_p,
531-
candidates: _Pointer[llama_token_data_array],
535+
candidates, # type: _Pointer[llama_token_data_array]
532536
p: c_float,
533537
min_keep: c_size_t = c_size_t(1),
534538
):
@@ -547,7 +551,7 @@ def llama_sample_top_p(
547551
# @details Tail Free Sampling described in https://www.trentonbricken.com/Tail-Free-Sampling/.
548552
def llama_sample_tail_free(
549553
ctx: llama_context_p,
550-
candidates: _Pointer[llama_token_data_array],
554+
candidates, # type: _Pointer[llama_token_data_array]
551555
z: c_float,
552556
min_keep: c_size_t = c_size_t(1),
553557
):
@@ -566,7 +570,7 @@ def llama_sample_tail_free(
566570
# @details Locally Typical Sampling implementation described in the paper https://arxiv.org/abs/2202.00666.
567571
def llama_sample_typical(
568572
ctx: llama_context_p,
569-
candidates: _Pointer[llama_token_data_array],
573+
candidates, # type: _Pointer[llama_token_data_array]
570574
p: c_float,
571575
min_keep: c_size_t = c_size_t(1),
572576
):
@@ -583,7 +587,9 @@ def llama_sample_typical(
583587

584588

585589
def llama_sample_temperature(
586-
ctx: llama_context_p, candidates: _Pointer[llama_token_data_array], temp: c_float
590+
ctx: llama_context_p,
591+
candidates, # type: _Pointer[llama_token_data_array]
592+
temp: c_float,
587593
):
588594
return _lib.llama_sample_temperature(ctx, candidates, temp)
589595

@@ -604,11 +610,11 @@ def llama_sample_temperature(
604610
# @param mu Maximum cross-entropy. This value is initialized to be twice the target cross-entropy (`2 * tau`) and is updated in the algorithm based on the error between the target and observed surprisal.
605611
def llama_sample_token_mirostat(
606612
ctx: llama_context_p,
607-
candidates: _Pointer[llama_token_data_array],
613+
candidates, # type: _Pointer[llama_token_data_array]
608614
tau: c_float,
609615
eta: c_float,
610616
m: c_int,
611-
mu: _Pointer[c_float],
617+
mu, # type: _Pointer[c_float]
612618
) -> llama_token:
613619
return _lib.llama_sample_token_mirostat(ctx, candidates, tau, eta, m, mu)
614620

@@ -631,10 +637,10 @@ def llama_sample_token_mirostat(
631637
# @param mu Maximum cross-entropy. This value is initialized to be twice the target cross-entropy (`2 * tau`) and is updated in the algorithm based on the error between the target and observed surprisal.
632638
def llama_sample_token_mirostat_v2(
633639
ctx: llama_context_p,
634-
candidates: _Pointer[llama_token_data_array],
640+
candidates, # type: _Pointer[llama_token_data_array]
635641
tau: c_float,
636642
eta: c_float,
637-
mu: _Pointer[c_float],
643+
mu, # type: _Pointer[c_float]
638644
) -> llama_token:
639645
return _lib.llama_sample_token_mirostat_v2(ctx, candidates, tau, eta, mu)
640646

@@ -651,7 +657,8 @@ def llama_sample_token_mirostat_v2(
651657

652658
# @details Selects the token with the highest probability.
653659
def llama_sample_token_greedy(
654-
ctx: llama_context_p, candidates: _Pointer[llama_token_data_array]
660+
ctx: llama_context_p,
661+
candidates, # type: _Pointer[llama_token_data_array]
655662
) -> llama_token:
656663
return _lib.llama_sample_token_greedy(ctx, candidates)
657664

@@ -665,7 +672,8 @@ def llama_sample_token_greedy(
665672

666673
# @details Randomly selects a token from the candidates based on their probabilities.
667674
def llama_sample_token(
668-
ctx: llama_context_p, candidates: _Pointer[llama_token_data_array]
675+
ctx: llama_context_p,
676+
candidates, # type: _Pointer[llama_token_data_array]
669677
) -> llama_token:
670678
return _lib.llama_sample_token(ctx, candidates)
671679

0 commit comments

Comments
 (0)