Merge pull request #46 from utilityai/batch-add-error-handeling

MarcusDunn · web-flow · commit fff5031b7d7d · 2024-01-24T16:05:11.000-08:00
Batch add error handeling
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/llama-cpp-2/examples/simple.rs b/llama-cpp-2/examples/simple.rs
@@ -96,7 +96,7 @@ either reduce n_len or increase n_ctx")
     for (i, token) in (0_i32..).zip(tokens_list.into_iter()) {
         // llama_decode will output logits only for the last token of the prompt
         let is_last = i == last_index;
-        batch.add(token, i, &[0], is_last);
+        batch.add(token, i, &[0], is_last)?;
     }
 
     ctx.decode(&mut batch)
@@ -129,7 +129,7 @@ either reduce n_len or increase n_ctx")
             std::io::stdout().flush()?;
 
             batch.clear();
-            batch.add(new_token_id, n_cur, &[0], true);
+            batch.add(new_token_id, n_cur, &[0], true)?;
         }
 
         n_cur += 1;
diff --git a/llama-cpp-2/src/llama_batch.rs b/llama-cpp-2/src/llama_batch.rs
@@ -14,6 +14,14 @@ pub struct LlamaBatch {
     pub(crate) llama_batch: llama_batch,
 }
 
+/// Errors that can occur when adding a token to a batch.
+#[derive(thiserror::Error, Debug)]
+pub enum BatchAddError {
+    /// There was not enough space in the batch to add the token.
+    #[error("Insufficient Space of {0}")]
+    InsufficientSpace(usize),
+}
+
 impl LlamaBatch {
     /// Clear the batch. This does not free the memory associated with the batch, but it does reset
     /// the number of tokens to 0.
@@ -35,8 +43,10 @@ impl LlamaBatch {
         pos: llama_pos,
         seq_ids: &[i32],
         logits: bool,
-    ) {
-        assert!(self.allocated > (usize::try_from(self.n_tokens() + 1).expect("self.n_tokens does not fit into a usize")), "there are only {} tokens allocated for the batch, but {} tokens in the batch when you tried to add one", self.allocated, self.n_tokens());
+    ) -> Result<(), BatchAddError> {
+        if self.allocated < usize::try_from(self.n_tokens() + 1).expect("cannot fit n_tokens into a usize") {
+            return Err(BatchAddError::InsufficientSpace(self.allocated))
+        }
         let offset = self.llama_batch.n_tokens;
         let offset_usize = usize::try_from(offset).expect("cannot fit n_tokens into a usize");
         unsafe {
@@ -66,6 +76,8 @@ impl LlamaBatch {
 
         // batch.n_tokens++;
         self.llama_batch.n_tokens += 1;
+
+        Ok(())
     }
     /// Create a new `LlamaBatch` that cab contain up to `n_tokens` tokens.
     ///

Original file line number	Diff line number	Diff line change
`@@ -96,7 +96,7 @@ either reduce n_len or increase n_ctx")`
`96`	`96`	`for (i, token) in (0_i32..).zip(tokens_list.into_iter()) {`
`97`	`97`	`// llama_decode will output logits only for the last token of the prompt`
`98`	`98`	`let is_last = i == last_index;`
`99`		`- batch.add(token, i, &[0], is_last);`
	`99`	`+ batch.add(token, i, &[0], is_last)?;`
`100`	`100`	`}`
`101`	`101`
`102`	`102`	`ctx.decode(&mut batch)`
`@@ -129,7 +129,7 @@ either reduce n_len or increase n_ctx")`
`129`	`129`	`std::io::stdout().flush()?;`
`130`	`130`
`131`	`131`	`batch.clear();`
`132`		`- batch.add(new_token_id, n_cur, &[0], true);`
	`132`	`+ batch.add(new_token_id, n_cur, &[0], true)?;`
`133`	`133`	`}`
`134`	`134`
`135`	`135`	`n_cur += 1;`