Skip to content

Commit 63553a8

Browse files
authored
open cache_max_entry_count >= 1 for Turbomind backend (#3913)
1 parent 33eafd2 commit 63553a8

File tree

1 file changed

+4
-3
lines changed

1 file changed

+4
-3
lines changed

lmdeploy/messages.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -195,7 +195,9 @@ class TurbomindEngineConfig:
195195
be allocated to the k/v cache.
196196
For lmdeploy versions greater than `v0.2.1`, it defaults to 0.8,
197197
signifying the percentage of FREE GPU memory to be reserved for
198-
the k/v cache
198+
the k/v cache.
199+
When it's an integer > 0, it represents the total number of k/v
200+
blocks.
199201
cache_chunk_size (int): The policy to apply for KV block from
200202
the block manager, default to -1.
201203
cache_block_seq_len (int): the length of the token sequence in
@@ -262,8 +264,7 @@ def __post_init__(self):
262264
"""Check input validation."""
263265
assert self.dtype in ['auto', 'float16', 'bfloat16']
264266
assert self.tp >= 1, 'tp must be a positive integer'
265-
assert 0 < self.cache_max_entry_count < 1, \
266-
'invalid cache_max_entry_count'
267+
assert self.cache_max_entry_count > 0, 'invalid cache_max_entry_count'
267268
assert self.quant_policy in (0, 4, 8), 'invalid quant_policy'
268269
assert self.rope_scaling_factor >= 0, 'invalid rope_scaling_factor'
269270
assert self.max_prefill_token_num >= 0, \

0 commit comments

Comments
 (0)