Merge branch 'main' into feat/expose-ggml-kv-types

gn64 · web-flow · commit d83319cdcc5f · 2025-08-30T08:43:25.000+09:00
diff --git a/llama-cpp-2/src/context/params.rs b/llama-cpp-2/src/context/params.rs
@@ -669,11 +669,36 @@ impl LlamaContextParams {
         self.context_params.swa_full
     }
 
-    /// Set the KV cache data type for K
+    /// Set the max number of sequences (i.e. distinct states for recurrent models)
+    ///
+    /// # Examples
+    ///
+    /// ```rust
+    /// use llama_cpp_2::context::params::LlamaContextParams;
+    /// let params = LlamaContextParams::default()
+    ///     .with_n_seq_max(64);
+    /// assert_eq!(params.n_seq_max(), 64);
+    /// ```
+    #[must_use]
+    pub fn with_n_seq_max(mut self, n_seq_max: u32) -> Self {
+        self.context_params.n_seq_max = n_seq_max;
+        self
+    }
+
+    /// Get the max number of sequences (i.e. distinct states for recurrent models)
     ///
     /// # Examples
     ///
     /// ```rust
+    /// use llama_cpp_2::context::params::LlamaContextParams;
+    /// let params = LlamaContextParams::default();
+    /// assert_eq!(params.n_seq_max(), 1);
+    /// ```
+    #[must_use]
+    pub fn n_seq_max(&self) -> u32 {
+        self.context_params.n_seq_max
+    }
+    /// Set the KV cache data type for K
     /// use llama_cpp_2::context::params::{LlamaContextParams, KvCacheType};
     /// let params = LlamaContextParams::default().with_type_k(KvCacheType::Q4_0);
     /// assert_eq!(params.type_k(), KvCacheType::Q4_0);