fixed clippy errors

MarcusDunn · MarcusDunn · commit 3c27f8094c9d · 2024-02-27T09:23:28.000-08:00
diff --git a/llama-cpp-2/src/context/sample.rs b/llama-cpp-2/src/context/sample.rs
@@ -184,7 +184,7 @@ impl LlamaContext<'_> {
         LlamaToken(token)
     }
 
-    /// Tail Free Sampling described in https://www.trentonbricken.com/Tail-Free-Sampling/.
+    /// Tail Free Sampling described in [Tail-Free-Sampling](https://www.trentonbricken.com/Tail-Free-Sampling/).
     pub fn sample_tail_free(&self, token_data: &mut LlamaTokenDataArray, z: f32, min_keep: usize) {
         let ctx = self.context.as_ptr();
         unsafe {
@@ -194,7 +194,7 @@ impl LlamaContext<'_> {
         }
     }
 
-    /// Locally Typical Sampling implementation described in the paper https://arxiv.org/abs/2202.00666.
+    /// Locally Typical Sampling implementation described in the [paper](https://arxiv.org/abs/2202.00666).
     pub fn sample_typical(&self, token_data: &mut LlamaTokenDataArray, p: f32, min_keep: usize) {
         let ctx = self.context.as_ptr();
         unsafe {
@@ -204,7 +204,7 @@ impl LlamaContext<'_> {
         }
     }
 
-    /// Nucleus sampling described in academic paper \"The Curious Case of Neural Text Degeneration\" https://arxiv.org/abs/1904.09751"
+    /// Nucleus sampling described in academic paper [The Curious Case of Neural Text Degeneration](https://arxiv.org/abs/1904.09751)"
     pub fn sample_top_p(&self, token_data: &mut LlamaTokenDataArray, p: f32, min_keep: usize) {
         let ctx = self.context.as_ptr();
         unsafe {
@@ -214,7 +214,7 @@ impl LlamaContext<'_> {
         }
     }
 
-    /// Minimum P sampling as described in https://github.com/ggerganov/llama.cpp/pull/3841"
+    /// Minimum P sampling as described in [#3841](https://github.com/ggerganov/llama.cpp/pull/3841)
     pub fn sample_min_p(
         &self,
         llama_token_data: &mut LlamaTokenDataArray,
@@ -229,7 +229,7 @@ impl LlamaContext<'_> {
         }
     }
 
-    /// Top-K sampling described in academic paper "The Curious Case of Neural Text Degeneration" https://arxiv.org/abs/1904.09751
+    /// Top-K sampling described in academic paper [The Curious Case of Neural Text Degeneration](https://arxiv.org/abs/1904.09751)
     pub fn sample_top_k(&self, token_data: &mut LlamaTokenDataArray, k: i32, min_keep: usize) {
         let ctx = self.context.as_ptr();
         unsafe {
diff --git a/llama-cpp-2/src/context/session.rs b/llama-cpp-2/src/context/session.rs
@@ -35,6 +35,15 @@ pub enum LoadSessionError {
     /// failed to convert path to str
     #[error("failed to convert path {0} to str")]
     PathToStrError(PathBuf),
+    
+    /// Insufficient max length
+    #[error("max_length is not large enough to hold {n_out} (was {max_tokens})")]
+    InsufficientMaxLength {
+        /// The length of the session file
+        n_out: usize,
+        /// The maximum length
+        max_tokens: usize,
+    },
 }
 
 impl LlamaContext<'_> {
@@ -44,9 +53,9 @@ impl LlamaContext<'_> {
     ///
     /// * `path_session` - The file to save to.
     /// * `tokens` - The tokens to associate the session with. This should be a prefix of a sequence of tokens that the context has processed, so that the relevant KV caches are already filled.
-    /// 
+    ///
     /// # Errors
-    /// 
+    ///
     /// Fails if the path is not a valid utf8, is not a valid c string, or llama.cpp fails to save the session file.
     pub fn save_session_file(
         &self,
@@ -64,7 +73,7 @@ impl LlamaContext<'_> {
             llama_cpp_sys_2::llama_save_session_file(
                 self.context.as_ptr(),
                 cstr.as_ptr(),
-                tokens.as_ptr() as *const i32,
+                tokens.as_ptr().cast::<llama_cpp_sys_2::llama_token>(),
                 tokens.len(),
             )
         } {
@@ -81,9 +90,9 @@ impl LlamaContext<'_> {
     ///
     /// * `path_session` - The file to load from. It must be a session file from a compatible context, otherwise the function will error.
     /// * `max_tokens` - The maximum token length of the loaded session. If the session was saved with a longer length, the function will error.
-    /// 
+    ///
     /// # Errors
-    /// 
+    ///
     /// Fails if the path is not a valid utf8, is not a valid c string, or llama.cpp fails to load the session file. (e.g. the file does not exist, is not a session file, etc.)
     pub fn load_session_file(
         &mut self,
@@ -103,11 +112,17 @@ impl LlamaContext<'_> {
             if llama_cpp_sys_2::llama_load_session_file(
                 self.context.as_ptr(),
                 cstr.as_ptr(),
-                tokens.as_mut_ptr().cast::<i32>(),
+                // cast is valid as LlamaToken is repr(transparent) 
+                Vec::<LlamaToken>::as_mut_ptr(&mut tokens).cast::<llama_cpp_sys_2::llama_token>(),
                 max_tokens,
                 &mut n_out,
             ) {
-                assert!(n_out <= max_tokens, "n_out is greater than max_tokens");
+                if n_out > max_tokens {
+                    return Err(LoadSessionError::InsufficientMaxLength {
+                        n_out,
+                        max_tokens,
+                    });
+                }
                 tokens.set_len(n_out);
                 Ok(tokens)
             } else {

Original file line number	Diff line number	Diff line change
`@@ -184,7 +184,7 @@ impl LlamaContext<'_> {`
`184`	`184`	`LlamaToken(token)`
`185`	`185`	`}`
`186`	`186`
`187`		`- /// Tail Free Sampling described in https://www.trentonbricken.com/Tail-Free-Sampling/.`
	`187`	`+ /// Tail Free Sampling described in [Tail-Free-Sampling](https://www.trentonbricken.com/Tail-Free-Sampling/).`
`188`	`188`	`pub fn sample_tail_free(&self, token_data: &mut LlamaTokenDataArray, z: f32, min_keep: usize) {`
`189`	`189`	`let ctx = self.context.as_ptr();`
`190`	`190`	`unsafe {`
`@@ -194,7 +194,7 @@ impl LlamaContext<'_> {`
`194`	`194`	`}`
`195`	`195`	`}`
`196`	`196`
`197`		`- /// Locally Typical Sampling implementation described in the paper https://arxiv.org/abs/2202.00666.`
	`197`	`+ /// Locally Typical Sampling implementation described in the [paper](https://arxiv.org/abs/2202.00666).`
`198`	`198`	`pub fn sample_typical(&self, token_data: &mut LlamaTokenDataArray, p: f32, min_keep: usize) {`
`199`	`199`	`let ctx = self.context.as_ptr();`
`200`	`200`	`unsafe {`
`@@ -204,7 +204,7 @@ impl LlamaContext<'_> {`
`204`	`204`	`}`
`205`	`205`	`}`
`206`	`206`
`207`		`- /// Nucleus sampling described in academic paper \"The Curious Case of Neural Text Degeneration\" https://arxiv.org/abs/1904.09751"`
	`207`	`+ /// Nucleus sampling described in academic paper [The Curious Case of Neural Text Degeneration](https://arxiv.org/abs/1904.09751)"`
`208`	`208`	`pub fn sample_top_p(&self, token_data: &mut LlamaTokenDataArray, p: f32, min_keep: usize) {`
`209`	`209`	`let ctx = self.context.as_ptr();`
`210`	`210`	`unsafe {`
`@@ -214,7 +214,7 @@ impl LlamaContext<'_> {`
`214`	`214`	`}`
`215`	`215`	`}`
`216`	`216`
`217`		`- /// Minimum P sampling as described in https://github.com/ggerganov/llama.cpp/pull/3841"`
	`217`	`+ /// Minimum P sampling as described in [#3841](https://github.com/ggerganov/llama.cpp/pull/3841)`
`218`	`218`	`pub fn sample_min_p(`
`219`	`219`	`&self,`
`220`	`220`	`llama_token_data: &mut LlamaTokenDataArray,`
`@@ -229,7 +229,7 @@ impl LlamaContext<'_> {`
`229`	`229`	`}`
`230`	`230`	`}`
`231`	`231`
`232`		`- /// Top-K sampling described in academic paper "The Curious Case of Neural Text Degeneration" https://arxiv.org/abs/1904.09751`
	`232`	`+ /// Top-K sampling described in academic paper [The Curious Case of Neural Text Degeneration](https://arxiv.org/abs/1904.09751)`
`233`	`233`	`pub fn sample_top_k(&self, token_data: &mut LlamaTokenDataArray, k: i32, min_keep: usize) {`
`234`	`234`	`let ctx = self.context.as_ptr();`
`235`	`235`	`unsafe {`