@@ -195,7 +195,9 @@ class TurbomindEngineConfig:
195195 be allocated to the k/v cache.
196196 For lmdeploy versions greater than `v0.2.1`, it defaults to 0.8,
197197 signifying the percentage of FREE GPU memory to be reserved for
198- the k/v cache
198+ the k/v cache.
199+ When it's an integer > 0, it represents the total number of k/v
200+ blocks.
199201 cache_chunk_size (int): The policy to apply for KV block from
200202 the block manager, default to -1.
201203 cache_block_seq_len (int): the length of the token sequence in
@@ -262,8 +264,7 @@ def __post_init__(self):
262264 """Check input validation."""
263265 assert self .dtype in ['auto' , 'float16' , 'bfloat16' ]
264266 assert self .tp >= 1 , 'tp must be a positive integer'
265- assert 0 < self .cache_max_entry_count < 1 , \
266- 'invalid cache_max_entry_count'
267+ assert self .cache_max_entry_count > 0 , 'invalid cache_max_entry_count'
267268 assert self .quant_policy in (0 , 4 , 8 ), 'invalid quant_policy'
268269 assert self .rope_scaling_factor >= 0 , 'invalid rope_scaling_factor'
269270 assert self .max_prefill_token_num >= 0 , \
0 commit comments