added max_total_tokens variable to class Generator, fixed type assertions

tokoba · tokoba · commit 809b0dab40c5 · 2025-05-11T17:37:47.000+09:00
diff --git a/exllamav3/generator/generator.py b/exllamav3/generator/generator.py
@@ -80,6 +80,7 @@ def __init__(
 
         # Paging
         self.pagetable = PageTable(self, cache)
+        self.max_total_tokens = PAGE_SIZE * self.pagetable.max_pages
 
         # Draft model
         self.draft_model = draft_model
@@ -563,7 +564,7 @@ def generate(
         prompt: list[tuple] | list[str] | tuple | str,
         max_new_tokens: int | None = None,
         min_new_tokens: int = 0,
-        seed: int or None = None,
+        seed: int | None = None,
         sampler: Sampler | list[Sampler] | None = None,
         token_healing: bool = False,
         encode_special_tokens: bool = False,
diff --git a/exllamav3/generator/job.py b/exllamav3/generator/job.py
@@ -16,7 +16,7 @@
 from ..util.tensor import SeqTensor
 
 # Convert list of strings to UTF32 format to pass by reference to partial matching function
-def _strings_to_utf32(strings: list[str]) -> (np.array, list[int]):
+def _strings_to_utf32(strings: list[str]) -> tuple[np.ndarray, np.ndarray] | None:
 
     if not strings: return bytearray(), None