Skip to content

Commit a27029d

Browse files
committed
Merge branch 'dep/update_llama_cpp_b6002' into dennis/feat/multi-modal
Signed-off-by: Dennis Keck <[email protected]>
2 parents b5cb56b + 56ae827 commit a27029d

File tree

16 files changed

+415
-98
lines changed

16 files changed

+415
-98
lines changed

.github/workflows/llama-cpp-rs-check.yml

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,9 +32,13 @@ jobs:
3232
- name: Clippy
3333
run: cargo clippy
3434
- name: Fmt
35-
run: cargo fmt
35+
run: cargo fmt --check
3636
- name: Test
3737
run: cargo test --features sampler
38+
- name: Dry-Run Publishing llama-cpp-sys-2 Crate
39+
run: RUST_BACKTRACE=1 cargo publish --package llama-cpp-sys-2 --verbose --dry-run
40+
- name: Dry-Run Publishing llama-cpp-2 Crate
41+
run: RUST_BACKTRACE=1 cargo publish --package llama-cpp-2 --verbose --dry-run
3842
arm64:
3943
name: Check that it builds on various targets
4044
runs-on: ubuntu-latest
@@ -49,7 +53,7 @@ jobs:
4953
with:
5054
platforms: arm64,amd64
5155
- name: Set up Docker Buildx
52-
uses: docker/setup-buildx-action@b5ca514318bd6ebac0fb2aedd5d36ec1b5c232a2
56+
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435
5357
- name: Build
5458
uses: docker/build-push-action@v6
5559
with:

.gitmodules

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
[submodule "llama-cpp-sys-2/llama.cpp"]
22
path = llama-cpp-sys-2/llama.cpp
3-
url = https://github.com/ggerganov/llama.cpp
3+
url = https://github.com/ggml-org/llama.cpp

Cargo.lock

Lines changed: 18 additions & 18 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,9 @@ hf-hub = { version = "0.3.2" }
2020
criterion = "0.5.1"
2121
pprof = "0.13.0"
2222
bindgen = "0.69.5"
23-
cc = "1.2.23"
23+
cc = "1.2.30"
2424
anyhow = "1.0.98"
25-
clap = "4.5.38"
25+
clap = "4.5.41"
2626
encoding_rs = "0.8.35"
2727
tracing-subscriber = { version = "0.3", features = ["json"] }
2828

examples/embeddings/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "embeddings"
3-
version = "0.1.107"
3+
version = "0.1.113"
44
edition = "2021"
55

66
[dependencies]

examples/simple/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "simple"
3-
version = "0.1.107"
3+
version = "0.1.113"
44
edition = "2021"
55

66
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

examples/simple/src/main.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,8 @@ fn main() -> Result<()> {
138138
} = Args::parse();
139139

140140
if verbose {
141-
tracing_subscriber::fmt().init();
141+
// tracing_subscriber::fmt().init();
142+
tracing_subscriber::fmt::init();
142143
}
143144
send_logs_to_tracing(LogOptions::default().with_logs_enabled(verbose));
144145

llama-cpp-2/Cargo.toml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,16 @@
11
[package]
22
name = "llama-cpp-2"
33
description = "llama.cpp bindings for Rust"
4-
version = "0.1.107"
4+
version = "0.1.113"
55
edition = "2021"
66
license = "MIT OR Apache-2.0"
77
repository = "https://github.com/utilityai/llama-cpp-rs"
88

99
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
1010

1111
[dependencies]
12-
enumflags2 = "0.7.11"
13-
llama-cpp-sys-2 = { path = "../llama-cpp-sys-2", version = "0.1.69" }
12+
enumflags2 = "0.7.12"
13+
llama-cpp-sys-2 = { path = "../llama-cpp-sys-2", version = "0.1.113" }
1414
thiserror = { workspace = true }
1515
tracing = { workspace = true }
1616
tracing-core = { workspace = true }
@@ -33,7 +33,7 @@ android-shared-stdcxx = ["llama-cpp-sys-2/shared-stdcxx"]
3333

3434

3535
[target.'cfg(all(target_os = "macos", any(target_arch = "aarch64", target_arch = "arm64")))'.dependencies]
36-
llama-cpp-sys-2 = { path = "../llama-cpp-sys-2", version = "0.1.69", features = [
36+
llama-cpp-sys-2 = { path = "../llama-cpp-sys-2", version = "0.1.113", features = [
3737
"metal",
3838
] }
3939

llama-cpp-2/src/context/kv_cache.rs

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -209,11 +209,4 @@ impl LlamaContext<'_> {
209209
pub fn kv_cache_update(&mut self) {
210210
unsafe { llama_cpp_sys_2::llama_kv_self_update(self.context.as_ptr()) }
211211
}
212-
213-
/// Returns the number of tokens in the KV cache (slow, use only for debug)
214-
/// If a KV cell has multiple sequences assigned to it, it will be counted multiple times
215-
#[must_use]
216-
pub fn get_kv_cache_token_count(&self) -> i32 {
217-
unsafe { llama_cpp_sys_2::llama_kv_self_n_tokens(self.context.as_ptr()) }
218-
}
219212
}

llama-cpp-2/src/context/params.rs

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -514,6 +514,36 @@ impl LlamaContextParams {
514514
pub fn pooling_type(&self) -> LlamaPoolingType {
515515
LlamaPoolingType::from(self.context_params.pooling_type)
516516
}
517+
518+
/// Set whether to use full sliding window attention
519+
///
520+
/// # Examples
521+
///
522+
/// ```rust
523+
/// use llama_cpp_2::context::params::LlamaContextParams;
524+
/// let params = LlamaContextParams::default()
525+
/// .with_swa_full(false);
526+
/// assert_eq!(params.swa_full(), false);
527+
/// ```
528+
#[must_use]
529+
pub fn with_swa_full(mut self, enabled: bool) -> Self {
530+
self.context_params.swa_full = enabled;
531+
self
532+
}
533+
534+
/// Get whether full sliding window attention is enabled
535+
///
536+
/// # Examples
537+
///
538+
/// ```rust
539+
/// use llama_cpp_2::context::params::LlamaContextParams;
540+
/// let params = LlamaContextParams::default();
541+
/// assert_eq!(params.swa_full(), true);
542+
/// ```
543+
#[must_use]
544+
pub fn swa_full(&self) -> bool {
545+
self.context_params.swa_full
546+
}
517547
}
518548

519549
/// Default parameters for `LlamaContext`. (as defined in llama.cpp by `llama_context_default_params`)

0 commit comments

Comments
 (0)