We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent fe331ec commit 7fc7bc3Copy full SHA for 7fc7bc3
llama_cpp/llama.py
@@ -735,10 +735,10 @@ def _create_completion(
735
try:
736
cache_item = self.cache[prompt_tokens]
737
cache_prefix_len = Llama.longest_token_prefix(
738
- cache_item.eval_tokens, prompt_tokens
+ cache_item.input_ids.tolist(), prompt_tokens
739
)
740
eval_prefix_len = Llama.longest_token_prefix(
741
- self.eval_tokens, prompt_tokens
+ self._input_ids.tolist(), prompt_tokens
742
743
if cache_prefix_len > eval_prefix_len:
744
self.load_state(cache_item)
0 commit comments