Skip to content

Commit 153bd17

Browse files
authored
Merge pull request #479 from utilityai/update-llama-cpp-2024-08-30
Updated llama-cpp (bot)
2 parents 5c1468a + 1f77eb0 commit 153bd17

File tree

8 files changed

+29
-23
lines changed

8 files changed

+29
-23
lines changed

embeddings/src/main.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@ fn main() -> Result<()> {
106106

107107
// initialize the context
108108
let ctx_params = LlamaContextParams::default()
109-
.with_n_threads_batch(std::thread::available_parallelism()?.get() as u32)
109+
.with_n_threads_batch(std::thread::available_parallelism()?.get().try_into()?)
110110
.with_embeddings(true);
111111

112112
let mut ctx = model

examples/simple/src/main.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,12 +54,12 @@ struct Args {
5454
long,
5555
help = "number of threads to use during generation (default: use all available threads)"
5656
)]
57-
threads: Option<u32>,
57+
threads: Option<i32>,
5858
#[arg(
5959
long,
6060
help = "number of threads to use during batch and prompt processing (default: use all available threads)"
6161
)]
62-
threads_batch: Option<u32>,
62+
threads_batch: Option<i32>,
6363
#[arg(
6464
short = 'c',
6565
long,

examples/usage/src/main.rs

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,13 @@
1-
/*
2-
git clone --recursive https://github.com/utilityai/llama-cpp-rs
3-
cd llama-cpp-rs/examples/usage
4-
wget https://huggingface.co/Qwen/Qwen2-1.5B-Instruct-GGUF/resolve/main/qwen2-1_5b-instruct-q4_0.gguf
5-
cargo run qwen2-1_5b-instruct-q4_0.gguf
6-
*/
1+
//! # Usage
2+
//!
3+
//! This is just about the smallest possible way to do inference. To fetch a model from hugging face:
4+
//!
5+
//! ```bash
6+
//! git clone --recursive https://github.com/utilityai/llama-cpp-rs
7+
//! cd llama-cpp-rs/examples/usage
8+
//! wget https://huggingface.co/Qwen/Qwen2-1.5B-Instruct-GGUF/resolve/main/qwen2-1_5b-instruct-q4_0.gguf
9+
//! cargo run --bin usage -- qwen2-1_5b-instruct-q4_0.gguf
10+
//! ```
711
use std::io::Write;
812
use llama_cpp_2::context::params::LlamaContextParams;
913
use llama_cpp_2::llama_backend::LlamaBackend;
@@ -13,6 +17,7 @@ use llama_cpp_2::model::LlamaModel;
1317
use llama_cpp_2::model::{AddBos, Special};
1418
use llama_cpp_2::token::data_array::LlamaTokenDataArray;
1519

20+
#[allow(clippy::cast_possible_wrap, clippy::cast_possible_truncation)]
1621
fn main() {
1722
let model_path = std::env::args().nth(1).expect("Please specify model path");
1823
let backend = LlamaBackend::init().unwrap();
@@ -28,14 +33,14 @@ fn main() {
2833
.expect("unable to create the llama_context");
2934
let tokens_list = model
3035
.str_to_token(&prompt, AddBos::Always)
31-
.expect(&format!("failed to tokenize {prompt}"));
36+
.unwrap_or_else(|_| panic!("failed to tokenize {prompt}"));
3237
let n_len = 64;
3338

3439
// create a llama_batch with size 512
3540
// we use this object to submit token data for decoding
3641
let mut batch = LlamaBatch::new(512, 1);
3742

38-
let last_index: i32 = (tokens_list.len() - 1) as i32;
43+
let last_index = tokens_list.len() as i32 - 1;
3944
for (i, token) in (0_i32..).zip(tokens_list.into_iter()) {
4045
// llama_decode will output logits only for the last token of the prompt
4146
let is_last = i == last_index;

llama-cpp-2/src/context/params.rs

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -262,7 +262,7 @@ impl LlamaContextParams {
262262
/// assert_eq!(params.n_threads(), 4);
263263
/// ```
264264
#[must_use]
265-
pub fn n_threads(&self) -> u32 {
265+
pub fn n_threads(&self) -> i32 {
266266
self.context_params.n_threads
267267
}
268268

@@ -275,7 +275,7 @@ impl LlamaContextParams {
275275
/// assert_eq!(params.n_threads_batch(), 4);
276276
/// ```
277277
#[must_use]
278-
pub fn n_threads_batch(&self) -> u32 {
278+
pub fn n_threads_batch(&self) -> i32 {
279279
self.context_params.n_threads_batch
280280
}
281281

@@ -290,7 +290,7 @@ impl LlamaContextParams {
290290
/// assert_eq!(params.n_threads(), 8);
291291
/// ```
292292
#[must_use]
293-
pub fn with_n_threads(mut self, n_threads: u32) -> Self {
293+
pub fn with_n_threads(mut self, n_threads: i32) -> Self {
294294
self.context_params.n_threads = n_threads;
295295
self
296296
}
@@ -306,7 +306,7 @@ impl LlamaContextParams {
306306
/// assert_eq!(params.n_threads_batch(), 8);
307307
/// ```
308308
#[must_use]
309-
pub fn with_n_threads_batch(mut self, n_threads: u32) -> Self {
309+
pub fn with_n_threads_batch(mut self, n_threads: i32) -> Self {
310310
self.context_params.n_threads_batch = n_threads;
311311
self
312312
}
@@ -354,9 +354,9 @@ impl LlamaContextParams {
354354
/// }
355355
///
356356
/// use llama_cpp_2::context::params::LlamaContextParams;
357-
/// let params = LlamaContextParams::default();
358-
/// params.with_cb_eval(Some(cb_eval_fn));
357+
/// let params = LlamaContextParams::default().with_cb_eval(Some(cb_eval_fn));
359358
/// ```
359+
#[must_use]
360360
pub fn with_cb_eval(
361361
mut self,
362362
cb_eval: llama_cpp_sys_2::ggml_backend_sched_eval_callback,
@@ -373,8 +373,9 @@ impl LlamaContextParams {
373373
/// use llama_cpp_2::context::params::LlamaContextParams;
374374
/// let params = LlamaContextParams::default();
375375
/// let user_data = std::ptr::null_mut();
376-
/// params.with_cb_eval_user_data(user_data);
376+
/// let params = params.with_cb_eval_user_data(user_data);
377377
/// ```
378+
#[must_use]
378379
pub fn with_cb_eval_user_data(mut self, cb_eval_user_data: *mut std::ffi::c_void) -> Self {
379380
self.context_params.cb_eval_user_data = cb_eval_user_data;
380381
self

llama-cpp-2/src/grammar.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -294,7 +294,7 @@ impl ParseState {
294294
type_: gre_type,
295295
value: c as _,
296296
});
297-
if rest.starts_with("-") && rest.get(1..).is_some_and(|r| !r.starts_with("]")) {
297+
if rest.starts_with('-') && rest.get(1..).is_some_and(|r| !r.starts_with(']')) {
298298
let (c, r) = Self::parse_char(&rest[1..])?;
299299
rest = r;
300300
rule.push(llama_grammar_element {

llama-cpp-2/src/model.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -333,7 +333,7 @@ impl LlamaModel {
333333
let len = string.as_bytes().len();
334334
let len = c_int::try_from(len).expect("length fits into c_int");
335335
let buf = string.into_raw();
336-
let lstrip = lstrip.map(|it| i32::from(it.get())).unwrap_or(0);
336+
let lstrip = lstrip.map_or(0, |it| i32::from(it.get()));
337337
let size = unsafe {
338338
llama_cpp_sys_2::llama_token_to_piece(
339339
self.model.as_ptr(),

llama-cpp-2/src/token_type.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ pub enum LlamaTokenAttr {
2020
SingleWord = llama_cpp_sys_2::LLAMA_TOKEN_ATTR_SINGLE_WORD as _,
2121
}
2222

23-
/// A set of LlamaTokenAttrs
23+
/// A set of `LlamaTokenAttrs`
2424
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
2525
pub struct LlamaTokenAttrs(pub BitFlags<LlamaTokenAttr>);
2626

@@ -42,7 +42,7 @@ impl TryFrom<llama_cpp_sys_2::llama_token_type> for LlamaTokenAttrs {
4242
type Error = LlamaTokenTypeFromIntError;
4343

4444
fn try_from(value: llama_cpp_sys_2::llama_vocab_type) -> Result<Self, Self::Error> {
45-
Ok(Self(BitFlags::from_bits(value as u32).map_err(|e| {
45+
Ok(Self(BitFlags::from_bits(value).map_err(|e| {
4646
LlamaTokenTypeFromIntError::UnknownValue(e.invalid_bits())
4747
})?))
4848
}

llama-cpp-sys-2/llama.cpp

0 commit comments

Comments
 (0)