Skip to content

Commit 870c5c8

Browse files
authored
Merge branch 'main' into 8-metal-on-mac
2 parents 8a73403 + ab4da04 commit 870c5c8

File tree

12 files changed

+123
-82
lines changed

12 files changed

+123
-82
lines changed

.github/workflows/update-toml-version.yaml

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@ jobs:
1717
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
1818

1919
- name: Update version in TOML files
20+
env:
21+
GH_TOKEN: ${{ github.token }}
2022
run: |
2123
# Extract the current version from the TOML file
2224
CURRENT_VERSION=$(awk -F '"' '/^version/ {print $2}' llama-cpp-2/Cargo.toml)
@@ -32,5 +34,8 @@ jobs:
3234
git config --global user.name "GitHub Actions"
3335
git add llama-cpp-sys-2/Cargo.toml llama-cpp-2/Cargo.toml
3436
git commit -m "Bump version to $NEXT_VERSION [skip ci]"
35-
# Push the changes back to the repository
36-
git push origin main:$GITHUB_REF
37+
# Create a branch for the changes
38+
git checkout -b version-bump-$NEXT_VERSION
39+
# Push the changes and create a pull request
40+
git push origin version-bump-$NEXT_VERSION
41+
gh pr create --base main --head version-bump-$NEXT_VERSION --title "Bump version to $NEXT_VERSION"

Cargo.lock

Lines changed: 17 additions & 23 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ tracing = "0.1"
1111
hf-hub = { version = "0.3.2" }
1212
criterion = "0.5.1"
1313
pprof = "0.13.0"
14-
bindgen = "0.69.2"
14+
bindgen = "0.69.4"
1515
cc = "1.0.83"
1616

1717
[workspace.lints.rust]

llama-cpp-2/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ criterion = { workspace = true }
1919
pprof = { workspace = true, features = ["criterion", "flamegraph"] }
2020

2121
# used in examples
22-
clap = { version = "4.4.18", features = ["derive"] }
22+
clap = { version = "4.5.0", features = ["derive"] }
2323
anyhow = "1.0.79"
2424

2525
[[bench]]

llama-cpp-2/benches/grammar_bias.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ fn criterion_benchmark(c: &mut Criterion) {
3030
.unwrap();
3131
let backend = LlamaBackend::init().unwrap();
3232
let model_params = LlamaModelParams::default();
33-
let model = LlamaModel::load_from_file(&backend, &file, &model_params).unwrap();
33+
let model = LlamaModel::load_from_file(&backend, file, &model_params).unwrap();
3434
let mut ctx = model
3535
.new_context(&backend, LlamaContextParams::default())
3636
.unwrap();

llama-cpp-2/examples/simple.rs

Lines changed: 25 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,20 @@
11
//! This is an translation of simple.cpp in llama.cpp using llama-cpp-2.
2-
#![allow(clippy::cast_possible_wrap, clippy::cast_possible_truncation)]
2+
#![allow(clippy::cast_possible_wrap, clippy::cast_possible_truncation, clippy::cast_precision_loss, clippy::cast_sign_loss)]
33

4-
use std::io::Write;
5-
use std::num::NonZeroU32;
6-
use std::path::PathBuf;
7-
use std::time::Duration;
4+
use anyhow::{bail, Context, Result};
85
use clap::Parser;
96
use llama_cpp_2::context::params::LlamaContextParams;
10-
use llama_cpp_2::llama_backend::LlamaBackend;
11-
use llama_cpp_2::model::LlamaModel;
12-
use llama_cpp_2::model::params::LlamaModelParams;
13-
use anyhow::{bail, Context, Result};
147
use llama_cpp_2::ggml_time_us;
8+
use llama_cpp_2::llama_backend::LlamaBackend;
159
use llama_cpp_2::llama_batch::LlamaBatch;
16-
use llama_cpp_2::token::data_array::LlamaTokenDataArray;
10+
use llama_cpp_2::model::params::LlamaModelParams;
1711
use llama_cpp_2::model::AddBos;
18-
12+
use llama_cpp_2::model::LlamaModel;
13+
use llama_cpp_2::token::data_array::LlamaTokenDataArray;
14+
use std::io::Write;
15+
use std::num::NonZeroU32;
16+
use std::path::PathBuf;
17+
use std::time::Duration;
1918

2019
#[derive(clap::Parser)]
2120
struct Args {
@@ -30,7 +29,6 @@ struct Args {
3029
disable_gpu: bool,
3130
}
3231

33-
3432
fn main() -> Result<()> {
3533
let params = Args::parse();
3634

@@ -60,12 +58,14 @@ fn main() -> Result<()> {
6058
.with_n_ctx(NonZeroU32::new(2048))
6159
.with_seed(1234);
6260

63-
let mut ctx = model.new_context(&backend, ctx_params)
61+
let mut ctx = model
62+
.new_context(&backend, ctx_params)
6463
.with_context(|| "unable to create the llama_context")?;
6564

6665
// tokenize the prompt
6766

68-
let tokens_list = model.str_to_token(&params.prompt, AddBos::Always)
67+
let tokens_list = model
68+
.str_to_token(&params.prompt, AddBos::Always)
6969
.with_context(|| format!("failed to tokenize {}", params.prompt))?;
7070

7171
let n_cxt = ctx.n_ctx() as i32;
@@ -75,8 +75,10 @@ fn main() -> Result<()> {
7575

7676
// make sure the KV cache is big enough to hold all the prompt and generated tokens
7777
if n_kv_req > n_cxt {
78-
bail!("n_kv_req > n_ctx, the required kv cache size is not big enough
79-
either reduce n_len or increase n_ctx")
78+
bail!(
79+
"n_kv_req > n_ctx, the required kv cache size is not big enough
80+
either reduce n_len or increase n_ctx"
81+
)
8082
}
8183

8284
// print the prompt token-by-token
@@ -137,7 +139,6 @@ either reduce n_len or increase n_ctx")
137139
ctx.decode(&mut batch).with_context(|| "failed to eval")?;
138140

139141
n_decode += 1;
140-
141142
}
142143

143144
eprintln!("\n");
@@ -146,10 +147,14 @@ either reduce n_len or increase n_ctx")
146147

147148
let duration = Duration::from_micros((t_main_end - t_main_start) as u64);
148149

149-
eprintln!("decoded {} tokens in {:.2} s, speed {:.2} t/s\n", n_decode, duration.as_secs_f32(), n_decode as f32 / duration.as_secs_f32());
150+
eprintln!(
151+
"decoded {} tokens in {:.2} s, speed {:.2} t/s\n",
152+
n_decode,
153+
duration.as_secs_f32(),
154+
n_decode as f32 / duration.as_secs_f32()
155+
);
150156

151157
println!("{}", ctx.timings());
152158

153159
Ok(())
154-
155-
}
160+
}

0 commit comments

Comments
 (0)