You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Copy file name to clipboardExpand all lines: llama-cpp-2/src/llama_batch.rs
+47-5Lines changed: 47 additions & 5 deletions
Original file line number
Diff line number
Diff line change
@@ -6,11 +6,11 @@ use llama_cpp_sys_2::{llama_batch, llama_batch_free, llama_batch_init, llama_pos
6
6
/// A safe wrapper around `llama_batch`.
7
7
#[derive(Debug)]
8
8
pubstructLlamaBatch{
9
-
/// The number of tokens the batch was allocated with. they are safe to write to - but not necessarily read from as they are not necessarily initilized
9
+
/// The number of tokens the batch was allocated with. they are safe to write to - but not necessarily read from as they are not necessarily initialized
10
10
allocated:usize,
11
-
/// The logits that are initilized. Used by [`LlamaContext`] to ensure that only initilized logits are accessed.
11
+
/// The logits that are initialized. Used by [`LlamaContext`] to ensure that only initialized logits are accessed.
12
12
pub(crate)initialized_logits:Vec<i32>,
13
-
/// The llama_cpp batch. always initilize by `llama_cpp_sys_2::llama_batch_init(allocated, <unknown>, <unknown>)`
13
+
/// The llama_cpp batch. always initialize by `llama_cpp_sys_2::llama_batch_init(allocated, <unknown>, <unknown>)`
14
14
pub(crate)llama_batch:llama_batch,
15
15
}
16
16
@@ -31,7 +31,7 @@ impl LlamaBatch {
31
31
}
32
32
33
33
/// add a token to the batch for sequences [`seq_ids`] at position [pos]. If [logits] is true, the
34
-
/// token will be initilized and can be read from after the next decode.
34
+
/// token will be initialized and can be read from after the next decode.
35
35
///
36
36
/// # Panics
37
37
///
@@ -90,7 +90,49 @@ impl LlamaBatch {
90
90
91
91
Ok(())
92
92
}
93
-
/// Create a new `LlamaBatch` that cab contain up to `n_tokens` tokens.
93
+
94
+
/// Add a sequence of tokens to the batch for the given sequence id. If [logits_all] is true, the
95
+
/// tokens will be initialized and can be read from after the next decode.
96
+
///
97
+
/// Either way the last token in the sequence will have its logits set to `true`.
98
+
///
99
+
/// # Errors
100
+
///
101
+
/// Returns an error if there is insufficient space in the buffer
0 commit comments