Skip to content

Commit 4ab26a1

Browse files
committed
Bump llama.cpp to latest as of 11/3/24 (b4020)
* half working sampling refactor (missing grammar) * remove tail free sampler as llama.cpp removed it
1 parent fa10708 commit 4ab26a1

File tree

8 files changed

+386
-639
lines changed

8 files changed

+386
-639
lines changed

examples/simple/src/main.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -252,7 +252,6 @@ either reduce n_len or increase n_ctx"
252252
{
253253
// sample the most likely token
254254
let new_token_id = sampler.sample(&mut ctx, None);
255-
sampler.accept(new_token_id);
256255

257256
// is it an end of stream?
258257
if model.is_eog_token(new_token_id) {

llama-cpp-2/src/context.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@ use crate::{
1717

1818
pub mod kv_cache;
1919
pub mod params;
20-
pub mod sample;
2120
pub mod session;
2221

2322
/// Safe wrapper around `llama_context`.

llama-cpp-2/src/context/sample.rs

Lines changed: 0 additions & 141 deletions
This file was deleted.

llama-cpp-2/src/context/sample/sampler.rs

Lines changed: 0 additions & 112 deletions
This file was deleted.

llama-cpp-2/src/lib.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ use std::path::PathBuf;
2323
use std::string::FromUtf8Error;
2424

2525
pub mod context;
26-
pub mod grammar;
26+
// pub mod grammar;
2727
pub mod llama_backend;
2828
pub mod llama_batch;
2929
pub mod model;

llama-cpp-2/src/sampler_chain.rs

Lines changed: 25 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,14 @@ impl Debug for LlamaSampler {
2424
}
2525

2626
impl LlamaSampler {
27+
/// Create a new `LlamaSampler`.
28+
/// ```
29+
/// # use llama_cpp_2::sampler_chain::{LlamaSampler, params::LlamaSamplerChainParams};
30+
/// let mut chain = LlamaSampler::new(LlamaSamplerChainParams::default());
31+
/// chain = chain.add_temp(0.7);
32+
/// chain = chain.add_dist(42);
33+
/// assert_eq!(chain.len(), 2);
34+
/// ```
2735
pub fn new(sampler_chain_params: params::LlamaSamplerChainParams) -> Self {
2836
let sampler = unsafe {
2937
NonNull::new(llama_cpp_sys_2::llama_sampler_chain_init(
@@ -120,22 +128,6 @@ impl LlamaSampler {
120128
self
121129
}
122130

123-
/// Initialize a tail-free sampler with the given z value and add it to the sampler chain.
124-
///
125-
/// Tail Free Sampling described in https://www.trentonbricken.com/Tail-Free-Sampling/.
126-
pub fn add_tail_free(self, z: f32, min_keep: usize) -> Self {
127-
unsafe {
128-
let tail_free_sampler =
129-
NonNull::new(llama_cpp_sys_2::llama_sampler_init_tail_free(z, min_keep))
130-
.expect("llama_sampler_chain_init_tail_free returned null");
131-
llama_cpp_sys_2::llama_sampler_chain_add(
132-
self.sampler.as_ptr(),
133-
tail_free_sampler.as_ptr(),
134-
);
135-
}
136-
self
137-
}
138-
139131
/// Initialize a typical-p sampler with the given value and add it to the sampler chain.
140132
pub fn add_typical_p(self, p: f32, min_keep: usize) -> Self {
141133
unsafe {
@@ -209,6 +201,23 @@ impl LlamaSampler {
209201
self
210202
}
211203

204+
/// Initialize an XTC sampler with the given values and add it to the sampler chain.
205+
pub fn add_xtc(self, p: f32, t: f32, min_keep: usize, seed: u32) -> Self {
206+
unsafe {
207+
let xtc_sampler = NonNull::new(llama_cpp_sys_2::llama_sampler_init_xtc(
208+
p, t, min_keep, seed,
209+
))
210+
.expect("llama_sampler_chain_init_xtc returned null");
211+
llama_cpp_sys_2::llama_sampler_chain_add(self.sampler.as_ptr(), xtc_sampler.as_ptr());
212+
}
213+
self
214+
}
215+
216+
/// Get the number of samplers in the chain.
217+
pub fn len(&self) -> i32 {
218+
unsafe { llama_cpp_sys_2::llama_sampler_chain_n(self.sampler.as_ptr()) }
219+
}
220+
212221
/// Reset the sampler chain.
213222
pub fn reset(&self) {
214223
unsafe {
@@ -225,13 +234,6 @@ impl LlamaSampler {
225234
LlamaToken(token)
226235
}
227236

228-
/// Accept a sampled token.
229-
pub fn accept(&self, token: LlamaToken) {
230-
unsafe {
231-
llama_cpp_sys_2::llama_sampler_accept(self.sampler.as_ptr(), token.0);
232-
}
233-
}
234-
235237
/// Reset the timings for the sampler.
236238
pub fn reset_timings(&self) {
237239
unsafe {

0 commit comments

Comments
 (0)