Skip to content

Commit aa54363

Browse files
authored
bugfix: fix duplicated execution of 'decr_pending_requets' in streaming mode. (#240)
Signed-off-by: pengtao.156 <[email protected]>
1 parent c536a06 commit aa54363

File tree

3 files changed

+13
-5
lines changed

3 files changed

+13
-5
lines changed

xllm/core/runtime/llm_master.cpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -174,7 +174,6 @@ void LLMMaster::handle_request(std::string prompt,
174174
auto cb = [callback = std::move(callback),
175175
scheduler = scheduler_.get()](const RequestOutput& output) {
176176
output.log_request_status();
177-
scheduler->decr_pending_requests();
178177
return callback(output);
179178
};
180179
// add into the queue
@@ -186,6 +185,9 @@ void LLMMaster::handle_request(std::string prompt,
186185
call]() mutable {
187186
AUTO_COUNTER(request_handling_latency_seconds_completion);
188187

188+
// remove the pending request after scheduling
189+
SCOPE_GUARD([this] { scheduler_->decr_pending_requests(); });
190+
189191
Timer timer;
190192
// verify the prompt
191193
if (!sp.verify_params(callback)) {
@@ -214,7 +216,6 @@ void LLMMaster::handle_request(std::vector<Message> messages,
214216
auto cb = [callback = std::move(callback),
215217
scheduler = scheduler_.get()](const RequestOutput& output) {
216218
output.log_request_status();
217-
scheduler->decr_pending_requests();
218219
return callback(output);
219220
};
220221
// add into the queue
@@ -226,6 +227,9 @@ void LLMMaster::handle_request(std::vector<Message> messages,
226227
call]() mutable {
227228
AUTO_COUNTER(request_handling_latency_seconds_chat);
228229

230+
// remove the pending request after scheduling
231+
SCOPE_GUARD([this] { scheduler_->decr_pending_requests(); });
232+
229233
// verify the prompt
230234
if (!sp.verify_params(callback)) {
231235
return;

xllm/core/runtime/vlm_master.cpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -207,7 +207,6 @@ void VLMMaster::handle_request(const std::string& prompt,
207207
auto cb = [callback = std::move(callback),
208208
scheduler = scheduler_.get()](const RequestOutput& output) {
209209
output.log_request_status();
210-
scheduler->decr_pending_requests();
211210
return callback(output);
212211
};
213212

@@ -218,6 +217,9 @@ void VLMMaster::handle_request(const std::string& prompt,
218217
callback = std::move(cb)]() mutable {
219218
AUTO_COUNTER(request_handling_latency_seconds_completion);
220219

220+
// remove the pending request after scheduling
221+
SCOPE_GUARD([this] { scheduler_->decr_pending_requests(); });
222+
221223
Timer timer;
222224
// verify the prompt
223225
if (!sp.verify_params(callback)) {
@@ -245,7 +247,6 @@ void VLMMaster::handle_request(const std::vector<Message>& messages,
245247
auto cb = [callback = std::move(callback),
246248
scheduler = scheduler_.get()](const RequestOutput& output) {
247249
output.log_request_status();
248-
scheduler->decr_pending_requests();
249250
return callback(output);
250251
};
251252

@@ -256,6 +257,9 @@ void VLMMaster::handle_request(const std::vector<Message>& messages,
256257
callback = std::move(cb)]() mutable {
257258
AUTO_COUNTER(request_handling_latency_seconds_chat);
258259

260+
// remove the pending request after scheduling
261+
SCOPE_GUARD([this] { scheduler_->decr_pending_requests(); });
262+
259263
// verify the prompt
260264
if (!sp.verify_params(callback)) {
261265
return;

xllm/core/scheduler/continuous_scheduler.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -940,7 +940,7 @@ void ContinuousScheduler::step_with_schedule_overlap(
940940
}
941941

942942
void ContinuousScheduler::generate() {
943-
bool batch_empty = true;
943+
bool batch_empty = false;
944944
while (num_pending_requests() > 0 || !batch_empty) {
945945
// build a batch of requests/sequences
946946
auto batch = prepare_batch();

0 commit comments

Comments
 (0)