Skip to content

Commit 2fbb793

Browse files
authored
Merge branch 'main' into 134-add-windows-to-test-ci
2 parents 9cf0286 + 047898c commit 2fbb793

File tree

22 files changed

+482
-253
lines changed

22 files changed

+482
-253
lines changed

.github/workflows/llama-cpp-rs-check.yml

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ jobs:
1818
runs-on: ubuntu-latest
1919
steps:
2020
- name: Checkout
21-
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
21+
uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633
2222
with:
2323
submodules: recursive
2424
- name: Install Compile Deps
@@ -34,7 +34,7 @@ jobs:
3434
- name: Fmt
3535
run: cargo fmt
3636
- name: Test
37-
run: cargo test
37+
run: cargo test --features sampler
3838
arm64:
3939
name: Check that it builds on various targets
4040
runs-on: ubuntu-latest
@@ -43,13 +43,13 @@ jobs:
4343
target: [ linux/arm64, linux/amd64 ]
4444
steps:
4545
- name: checkout
46-
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
46+
uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633
4747
- name: Setup QEMU
4848
uses: docker/setup-qemu-action@68827325e0b33c7199eb31dd4e31fbe9023e06e3
4949
with:
5050
platforms: arm64,amd64
5151
- name: Set up Docker Buildx
52-
uses: docker/setup-buildx-action@0d103c3126aa41d772a8362f6aa67afac040f80c
52+
uses: docker/setup-buildx-action@2b51285047da1547ffb1b2203d8be4c0af6b1f20
5353
- name: Build
5454
uses: docker/build-push-action@v5
5555
with:
@@ -61,24 +61,24 @@ jobs:
6161
runs-on: macos-latest
6262
steps:
6363
- name: checkout
64-
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
64+
uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633
6565
with:
6666
submodules: recursive
6767
- name: Setup Rust
6868
uses: dtolnay/rust-toolchain@stable
6969
- name: Build
70-
run: cargo build
70+
run: cargo build --features sampler
7171
windows:
7272
name: Check that it builds on windows
7373
runs-on: windows-latest
7474
steps:
7575
- name: checkout
76-
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
76+
uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633
7777
with:
7878
submodules: recursive
7979
- name: Setup Rust
8080
uses: dtolnay/rust-toolchain@stable
8181
- name: Build
82-
run: cargo build
82+
run: cargo build --features sampler
8383
- name: Test
84-
run: cargo test
84+
run: cargo test --features sampler

.github/workflows/publish-upon-release.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ jobs:
1414
runs-on: ubuntu-latest
1515

1616
steps:
17-
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
17+
- uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633
1818
with:
1919
submodules: recursive
2020
- name: Publish crates for llama-cpp-sys-2

.github/workflows/update-llama-cpp.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ jobs:
1515
steps:
1616
- name: Set date
1717
run: echo "DATE=$(date -I)" >> $GITHUB_ENV
18-
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
18+
- uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633
1919
name: Checkout latest
2020
with:
2121
submodules: recursive

.github/workflows/update-toml-version.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ jobs:
1515

1616
steps:
1717
- name: Checkout code
18-
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
18+
uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633
1919
with:
2020
submodules: recursive
2121

Cargo.lock

Lines changed: 16 additions & 16 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,8 @@ criterion = "0.5.1"
1717
pprof = "0.13.0"
1818
bindgen = "0.69.4"
1919
cc = "1.0.90"
20-
anyhow = "1.0.80"
21-
clap = "4.5.2"
20+
anyhow = "1.0.81"
21+
clap = "4.5.3"
2222

2323
[workspace.lints.rust]
2424
missing_docs = { level = "warn" }

embeddings/Cargo.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
[package]
22
name = "embeddings"
3-
version = "0.1.40"
3+
version = "0.1.43"
44
edition = "2021"
55

66
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
77

88
[dependencies]
9-
llama-cpp-2 = { path = "../llama-cpp-2", version = "0.1.40" }
9+
llama-cpp-2 = { path = "../llama-cpp-2", version = "0.1.43" }
1010
hf-hub = { workspace = true }
1111
clap = { workspace = true , features = ["derive"] }
1212
anyhow = { workspace = true }

llama-cpp-2/Cargo.toml

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,24 @@
11
[package]
22
name = "llama-cpp-2"
33
description = "llama.cpp bindings for Rust"
4-
version = "0.1.40"
4+
version = "0.1.43"
55
edition = "2021"
66
license = "MIT OR Apache-2.0"
77
repository = "https://github.com/utilityai/llama-cpp-rs"
88

99
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
1010

1111
[dependencies]
12-
llama-cpp-sys-2 = { path = "../llama-cpp-sys-2", version = "0.1.40" }
12+
llama-cpp-sys-2 = { path = "../llama-cpp-sys-2", version = "0.1.43" }
1313
thiserror = { workspace = true }
1414
tracing = { workspace = true }
1515

1616
[features]
1717
cublas = ["llama-cpp-sys-2/cublas"]
18+
sampler = []
1819

1920
[lints]
2021
workspace = true
22+
23+
[package.metadata.docs.rs]
24+
features = ["sampler"]

llama-cpp-2/src/context.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ impl<'model> LlamaContext<'model> {
6969
///
7070
/// # Panics
7171
///
72-
/// - the returned [`c_int`] from llama-cpp does not fit into a i32 (this should never happen on most systems)
72+
/// - the returned [`std::ffi::c_int`] from llama-cpp does not fit into a i32 (this should never happen on most systems)
7373
pub fn decode(&mut self, batch: &mut LlamaBatch) -> Result<(), DecodeError> {
7474
let result =
7575
unsafe { llama_cpp_sys_2::llama_decode(self.context.as_ptr(), batch.llama_batch) };

llama-cpp-2/src/context/kv_cache.rs

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,8 @@ impl LlamaContext<'_> {
2222
///
2323
/// * `src` - The sequence id to copy the cache from.
2424
/// * `dest` - The sequence id to copy the cache to.
25-
/// * `p0` - The start position of the cache to clear. If `None`, the entire cache is copied up to [p1].
26-
/// * `p1` - The end position of the cache to clear. If `None`, the entire cache is copied starting from [p0].
25+
/// * `p0` - The start position of the cache to clear. If `None`, the entire cache is copied up to `p1`.
26+
/// * `p1` - The end position of the cache to clear. If `None`, the entire cache is copied starting from `p0`.
2727
pub fn copy_kv_cache_seq(&mut self, src: i32, dest: i32, p0: Option<u16>, p1: Option<u16>) {
2828
let p0 = p0.map_or(-1, i32::from);
2929
let p1 = p1.map_or(-1, i32::from);
@@ -37,8 +37,8 @@ impl LlamaContext<'_> {
3737
/// # Parameters
3838
///
3939
/// * `src` - The sequence id to clear the cache for.
40-
/// * `p0` - The start position of the cache to clear. If `None`, the entire cache is cleared up to [p1].
41-
/// * `p1` - The end position of the cache to clear. If `None`, the entire cache is cleared from [p0].
40+
/// * `p0` - The start position of the cache to clear. If `None`, the entire cache is cleared up to `p1`.
41+
/// * `p1` - The end position of the cache to clear. If `None`, the entire cache is cleared from `p0`.
4242
pub fn clear_kv_cache_seq(&mut self, src: i32, p0: Option<u16>, p1: Option<u16>) {
4343
let p0 = p0.map_or(-1, i32::from);
4444
let p1 = p1.map_or(-1, i32::from);
@@ -68,16 +68,16 @@ impl LlamaContext<'_> {
6868
}
6969

7070
#[allow(clippy::doc_markdown)]
71-
/// Adds relative position "delta" to all tokens that belong to the specified sequence and have positions in [p0, p1)
71+
/// Adds relative position "delta" to all tokens that belong to the specified sequence and have positions in `[p0, p1)`
7272
/// If the KV cache is RoPEd, the KV data is updated accordingly:
7373
/// - lazily on next [`LlamaContext::decode`]
7474
/// - explicitly with [`Self::kv_cache_update`]
7575
///
7676
/// # Parameters
7777
///
7878
/// * `seq_id` - The sequence id to update
79-
/// * `p0` - The start position of the cache to update. If `None`, the entire cache is updated up to [p1].
80-
/// * `p1` - The end position of the cache to update. If `None`, the entire cache is updated starting from [p0].
79+
/// * `p0` - The start position of the cache to update. If `None`, the entire cache is updated up to `p1`.
80+
/// * `p1` - The end position of the cache to update. If `None`, the entire cache is updated starting from `p0`.
8181
/// * `delta` - The relative position to add to the tokens
8282
pub fn kv_cache_seq_add(&mut self, seq_id: i32, p0: Option<u16>, p1: Option<u16>, delta: i32) {
8383
let p0 = p0.map_or(-1, i32::from);
@@ -95,8 +95,8 @@ impl LlamaContext<'_> {
9595
/// # Parameters
9696
///
9797
/// * `seq_id` - The sequence id to update
98-
/// * `p0` - The start position of the cache to update. If `None`, the entire cache is updated up to [p1].
99-
/// * `p1` - The end position of the cache to update. If `None`, the entire cache is updated starting from [p0].
98+
/// * `p0` - The start position of the cache to update. If `None`, the entire cache is updated up to `p1`.
99+
/// * `p1` - The end position of the cache to update. If `None`, the entire cache is updated starting from `p0`.
100100
/// * `d` - The factor to divide the positions by
101101
pub fn kv_cache_seq_div(
102102
&mut self,
@@ -238,11 +238,11 @@ impl<'a> KVCacheView<'a> {
238238
unsafe {
239239
std::slice::from_raw_parts(
240240
self.view.cells_sequences,
241-
usize::try_from(self.view.n_cells * self.view.n_max_seq)
241+
usize::try_from(self.view.n_cells * self.view.n_seq_max)
242242
.expect("failed to fit n_cells * n_max_seq into usize"),
243243
)
244244
}
245-
.chunks(usize::try_from(self.view.n_max_seq).expect("failed to fit n_max_seq into usize"))
245+
.chunks(usize::try_from(self.view.n_seq_max).expect("failed to fit n_max_seq into usize"))
246246
}
247247
}
248248

0 commit comments

Comments
 (0)