File tree Expand file tree Collapse file tree 2 files changed +11
-8
lines changed Expand file tree Collapse file tree 2 files changed +11
-8
lines changed Original file line number Diff line number Diff line change @@ -275,16 +275,16 @@ def batched_generate_fn(
275
275
)
276
276
# We may need the last time slice of `all_logits` below:
277
277
all_logits = model (inputs , input_pos = start )
278
- if start == 0 :
279
- max_tokens_forward = model .kv_cache_max_tokens_forward ()
280
- if prompt_chunksize > max_tokens_forward :
281
- print (
282
- f"prompt_chunksize = { prompt_chunksize } > { max_tokens_forward } = max_tokens_forward. Lowering it to the latter." )
283
- prompt_chunksize = max_tokens_forward
284
- start = token_pos
285
278
if token_pos == min_prompt_size :
286
279
break
287
- chunksize = min (prompt_chunksize , min_prompt_size - token_pos )
280
+ start = token_pos
281
+ # Note that `max_tokens_forward` can change during the course of
282
+ # prompt processing:
283
+ chunksize = min ((
284
+ prompt_chunksize ,
285
+ model .kv_cache_max_tokens_forward (),
286
+ min_prompt_size - token_pos
287
+ ))
288
288
token_pos += chunksize
289
289
290
290
# Generation loop: One token per iteration
Original file line number Diff line number Diff line change @@ -151,6 +151,9 @@ def next_token_pos(self) -> Optional[int]:
151
151
@property
152
152
def max_tokens_forward (self ) -> int :
153
153
"""
154
+ Note that this limit may change during the course of the generation
155
+ for certain caches.
156
+
154
157
Returns:
155
158
Maximum number of token positions which can be treated in
156
159
:meth:`forward`. Depends on cache, but is `<= cache_length`
You can’t perform that action at this time.
0 commit comments