@@ -17,16 +17,18 @@ FIFOScheduler::FIFOScheduler(const rtp_llm::GptInitParameter& params,
1717 max_seq_len_ (params.max_seq_len_),
1818 max_batch_tokens_size_ (params.max_batch_tokens_size_),
1919 max_generate_batch_size_ (params.max_generate_batch_size_),
20- reserve_block_num_ (params.scheduler_reserve_resource_ratio_ * cache_manager->availableBlockNums () / 100),
2120 // not support fallback when use pd_speration:use_cache_store
2221 enable_partial_fallback_ (params.enable_partial_fallback_ && params.role_type_ == RoleType::PDFUSION),
2322 enable_whole_fallback_ (params.role_type_ == RoleType::PDFUSION),
2423 enable_fast_gen_ (params.enable_fast_gen_),
2524 need_fill_fake_stream_ (params.dp_size_ > 1 && params.tp_rank_ == 0 ),
2625 fast_gen_max_context_len_ (params.fast_gen_max_context_len_),
2726 metrics_reporter_ (metrics_reporter) {
28- RTP_LLM_LOG_INFO (" max_generate_batch_size %d" , max_generate_batch_size_);
29- RTP_LLM_LOG_INFO (" max_batch_tokens_size %d" , max_batch_tokens_size_);
27+ reserve_block_num_ = params.scheduler_reserve_resource_ratio_ * cache_manager->availableBlockNums () / 100 ;
28+ RTP_LLM_LOG_INFO (" max_generate_batch_size is [%d], max_batch_tokens_size is [%d], reserve_block_num is [%d]" ,
29+ max_generate_batch_size_,
30+ max_batch_tokens_size_,
31+ reserve_block_num_);
3032}
3133
3234FIFOScheduler::~FIFOScheduler () {
@@ -228,13 +230,27 @@ bool FIFOScheduler::evaluateNewStream(const list<GenerateStreamPtr>& streams,
228230 return false ;
229231 }
230232
231- auto result = new_stream->initKVBlock (token_capacity_, reserve_step);
233+ auto old_blocks = new_stream->maxBlockSize ();
234+ auto result = new_stream->initKVBlock (token_capacity_, reserve_step);
232235 if (result.ok () && enable_fast_gen_) {
233236 token_capacity_ -= result.value ();
234237 RTP_LLM_LOG_DEBUG (
235238 " after stream [%ld] acquireCapacity, token_capacity is %d" , new_stream->streamId (), token_capacity_);
236239 }
237- return result.ok () && cache_manager_->availableBlockNums () >= reserve_block_num_;
240+ if (result.ok ()) {
241+ if (cache_manager_->availableBlockNums () >= reserve_block_num_) {
242+ return true ;
243+ } else {
244+ RTP_LLM_LOG_INFO (
245+ " current availableBlockNums is [%ld], reserve_block_num is [%ld], so stream [%ld] malloc failed" ,
246+ cache_manager_->availableBlockNums (),
247+ reserve_block_num_,
248+ new_stream->streamId ());
249+ new_stream->tryReleaseKVBlock (new_stream->maxBlockSize () - old_blocks);
250+ return false ;
251+ }
252+ }
253+ return false ;
238254}
239255
240256list<GenerateStreamPtr> FIFOScheduler::scheduleNew (size_t reserve_step) {
0 commit comments