utilityai
diff --git a/‎.github/workflows/llama-cpp-rs-check.yml
Lines changed: 9 additions & 9 deletions b/‎.github/workflows/llama-cpp-rs-check.yml
Lines changed: 9 additions & 9 deletions
diff --git a/‎.github/workflows/publish-upon-release.yml
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/publish-upon-release.yml
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/update-llama-cpp.yml
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/update-llama-cpp.yml
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/update-toml-version.yaml
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/update-toml-version.yaml
Lines changed: 1 addition & 1 deletion
diff --git a/‎Cargo.lock
Lines changed: 16 additions & 16 deletions b/‎Cargo.lock
Lines changed: 16 additions & 16 deletions
diff --git a/‎Cargo.toml
Lines changed: 2 additions & 2 deletions b/‎Cargo.toml
Lines changed: 2 additions & 2 deletions
diff --git a/‎embeddings/Cargo.toml
Lines changed: 2 additions & 2 deletions b/‎embeddings/Cargo.toml
Lines changed: 2 additions & 2 deletions
diff --git a/‎llama-cpp-2/Cargo.toml
Lines changed: 6 additions & 2 deletions b/‎llama-cpp-2/Cargo.toml
Lines changed: 6 additions & 2 deletions
diff --git a/‎llama-cpp-2/src/context.rs
Lines changed: 1 addition & 1 deletion b/‎llama-cpp-2/src/context.rs
Lines changed: 1 addition & 1 deletion
diff --git a/‎llama-cpp-2/src/context/kv_cache.rs
Lines changed: 11 additions & 11 deletions b/‎llama-cpp-2/src/context/kv_cache.rs
Lines changed: 11 additions & 11 deletions
@@ -18,7 +18,7 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - name: Checkout
-        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
+        uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633
         with:
           submodules: recursive
       - name: Install Compile Deps
@@ -34,7 +34,7 @@ jobs:
       - name: Fmt
         run: cargo fmt
       - name: Test
-        run: cargo test
+        run: cargo test --features sampler
   arm64:
     name: Check that it builds on various targets
     runs-on: ubuntu-latest
@@ -43,13 +43,13 @@ jobs:
         target: [ linux/arm64, linux/amd64 ]
     steps:
       - name: checkout
-        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
+        uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633
       - name: Setup QEMU
         uses: docker/setup-qemu-action@68827325e0b33c7199eb31dd4e31fbe9023e06e3
         with:
           platforms: arm64,amd64
       - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@0d103c3126aa41d772a8362f6aa67afac040f80c
+        uses: docker/setup-buildx-action@2b51285047da1547ffb1b2203d8be4c0af6b1f20
       - name: Build
         uses: docker/build-push-action@v5
         with:
@@ -61,24 +61,24 @@ jobs:
     runs-on: macos-latest
     steps:
       - name: checkout
-        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
+        uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633
         with:
           submodules: recursive
       - name: Setup Rust
         uses: dtolnay/rust-toolchain@stable
       - name: Build
-        run: cargo build
+        run: cargo build --features sampler
   windows:
     name: Check that it builds on windows
     runs-on: windows-latest
     steps:
       - name: checkout
-        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
+        uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633
         with:
           submodules: recursive
       - name: Setup Rust
         uses: dtolnay/rust-toolchain@stable
       - name: Build
-        run: cargo build
+        run: cargo build --features sampler
       - name: Test
-        run: cargo test
+        run: cargo test --features sampler
@@ -14,7 +14,7 @@ jobs:
     runs-on: ubuntu-latest
 
     steps:
-      - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
+      - uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633
         with:
           submodules: recursive
       - name: Publish crates for llama-cpp-sys-2
 
@@ -15,7 +15,7 @@ jobs:
     steps:
       - name: Set date
         run: echo "DATE=$(date -I)" >> $GITHUB_ENV
-      - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
+      - uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633
         name: Checkout latest
         with:
           submodules: recursive
 
@@ -15,7 +15,7 @@ jobs:
 
     steps:
       - name: Checkout code
-        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
+        uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633
         with:
           submodules: recursive
 
 
@@ -17,8 +17,8 @@ criterion = "0.5.1"
 pprof = "0.13.0"
 bindgen = "0.69.4"
 cc = "1.0.90"
-anyhow = "1.0.80"
-clap = "4.5.2"
+anyhow = "1.0.81"
+clap = "4.5.3"
 
 [workspace.lints.rust]
 missing_docs = { level = "warn" }
 
@@ -1,12 +1,12 @@
 [package]
 name = "embeddings"
-version = "0.1.40"
+version = "0.1.43"
 edition = "2021"
 
 # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 
 [dependencies]
-llama-cpp-2 = { path = "../llama-cpp-2", version = "0.1.40" }
+llama-cpp-2 = { path = "../llama-cpp-2", version = "0.1.43" }
 hf-hub = { workspace = true }
 clap = { workspace = true , features = ["derive"] }
 anyhow = { workspace = true }
 
@@ -1,20 +1,24 @@
 [package]
 name = "llama-cpp-2"
 description = "llama.cpp bindings for Rust"
-version = "0.1.40"
+version = "0.1.43"
 edition = "2021"
 license = "MIT OR Apache-2.0"
 repository = "https://github.com/utilityai/llama-cpp-rs"
 
 # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 
 [dependencies]
-llama-cpp-sys-2 = { path = "../llama-cpp-sys-2", version = "0.1.40" }
+llama-cpp-sys-2 = { path = "../llama-cpp-sys-2", version = "0.1.43" }
 thiserror = { workspace = true }
 tracing = { workspace = true }
 
 [features]
 cublas = ["llama-cpp-sys-2/cublas"]
+sampler = []
 
 [lints]
 workspace = true
+
+[package.metadata.docs.rs]
+features = ["sampler"]
@@ -69,7 +69,7 @@ impl<'model> LlamaContext<'model> {
     ///
     /// # Panics
     ///
-    /// - the returned [`c_int`] from llama-cpp does not fit into a i32 (this should never happen on most systems)
+    /// - the returned [`std::ffi::c_int`] from llama-cpp does not fit into a i32 (this should never happen on most systems)
     pub fn decode(&mut self, batch: &mut LlamaBatch) -> Result<(), DecodeError> {
         let result =
             unsafe { llama_cpp_sys_2::llama_decode(self.context.as_ptr(), batch.llama_batch) };
 
@@ -22,8 +22,8 @@ impl LlamaContext<'_> {
     ///
     /// * `src` - The sequence id to copy the cache from.
     /// * `dest` - The sequence id to copy the cache to.
-    /// * `p0` - The start position of the cache to clear. If `None`, the entire cache is copied up to [p1].
-    /// * `p1` - The end position of the cache to clear. If `None`, the entire cache is copied starting from [p0].
+    /// * `p0` - The start position of the cache to clear. If `None`, the entire cache is copied up to `p1`.
+    /// * `p1` - The end position of the cache to clear. If `None`, the entire cache is copied starting from `p0`.
     pub fn copy_kv_cache_seq(&mut self, src: i32, dest: i32, p0: Option<u16>, p1: Option<u16>) {
         let p0 = p0.map_or(-1, i32::from);
         let p1 = p1.map_or(-1, i32::from);
@@ -37,8 +37,8 @@ impl LlamaContext<'_> {
     /// # Parameters
     ///
     /// * `src` - The sequence id to clear the cache for.
-    /// * `p0` - The start position of the cache to clear. If `None`, the entire cache is cleared up to [p1].
-    /// * `p1` - The end position of the cache to clear. If `None`, the entire cache is cleared from [p0].
+    /// * `p0` - The start position of the cache to clear. If `None`, the entire cache is cleared up to `p1`.
+    /// * `p1` - The end position of the cache to clear. If `None`, the entire cache is cleared from `p0`.
     pub fn clear_kv_cache_seq(&mut self, src: i32, p0: Option<u16>, p1: Option<u16>) {
         let p0 = p0.map_or(-1, i32::from);
         let p1 = p1.map_or(-1, i32::from);
@@ -68,16 +68,16 @@ impl LlamaContext<'_> {
     }
 
     #[allow(clippy::doc_markdown)]
-    /// Adds relative position "delta" to all tokens that belong to the specified sequence and have positions in [p0, p1)
+    /// Adds relative position "delta" to all tokens that belong to the specified sequence and have positions in `[p0, p1)`
     /// If the KV cache is RoPEd, the KV data is updated accordingly:
     ///   - lazily on next [`LlamaContext::decode`]
     ///   - explicitly with [`Self::kv_cache_update`]
     ///
     /// # Parameters
     ///
     /// * `seq_id` - The sequence id to update
-    /// * `p0` - The start position of the cache to update. If `None`, the entire cache is updated up to [p1].
-    /// * `p1` - The end position of the cache to update. If `None`, the entire cache is updated starting from [p0].
+    /// * `p0` - The start position of the cache to update. If `None`, the entire cache is updated up to `p1`.
+    /// * `p1` - The end position of the cache to update. If `None`, the entire cache is updated starting from `p0`.
     /// * `delta` - The relative position to add to the tokens
     pub fn kv_cache_seq_add(&mut self, seq_id: i32, p0: Option<u16>, p1: Option<u16>, delta: i32) {
         let p0 = p0.map_or(-1, i32::from);
@@ -95,8 +95,8 @@ impl LlamaContext<'_> {
     /// # Parameters
     ///
     /// * `seq_id` - The sequence id to update
-    /// * `p0` - The start position of the cache to update. If `None`, the entire cache is updated up to [p1].
-    /// * `p1` - The end position of the cache to update. If `None`, the entire cache is updated starting from [p0].
+    /// * `p0` - The start position of the cache to update. If `None`, the entire cache is updated up to `p1`.
+    /// * `p1` - The end position of the cache to update. If `None`, the entire cache is updated starting from `p0`.
     /// * `d` - The factor to divide the positions by
     pub fn kv_cache_seq_div(
         &mut self,
@@ -238,11 +238,11 @@ impl<'a> KVCacheView<'a> {
         unsafe {
             std::slice::from_raw_parts(
                 self.view.cells_sequences,
-                usize::try_from(self.view.n_cells * self.view.n_max_seq)
+                usize::try_from(self.view.n_cells * self.view.n_seq_max)
                     .expect("failed to fit n_cells * n_max_seq into usize"),
             )
         }
-        .chunks(usize::try_from(self.view.n_max_seq).expect("failed to fit n_max_seq into usize"))
+        .chunks(usize::try_from(self.view.n_seq_max).expect("failed to fit n_max_seq into usize"))
     }
 }