Skip to content

Commit 57b7f22

Browse files
committed
Set scheduler v1 as default
1 parent 84719cc commit 57b7f22

File tree

3 files changed

+11
-4
lines changed

3 files changed

+11
-4
lines changed

fastdeploy/engine/common_engine.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -552,8 +552,6 @@ def _fetch_request():
552552
get_request_pool.submit(_fetch_request)
553553
# 2. Schedule requests
554554
tasks = self.resource_manager.schedule()
555-
main_process_metrics.num_requests_waiting.dec(len(tasks))
556-
main_process_metrics.num_requests_running.inc(len(tasks))
557555
# 3. Send to engine
558556
if tasks:
559557
self.resource_manager.get_real_bsz()

fastdeploy/engine/sched/resource_manager_v1.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,8 @@ def _trigger_preempt(self, request, num_new_blocks, preempted_reqs, scheduled_re
123123
self.to_be_rescheduled_request_id_set.add(preempted_req.request_id)
124124
preempted_reqs.append(preempted_req)
125125
scheduled_reqs.append(self._prepare_preempt_task(preempted_req))
126+
main_process_metrics.num_requests_waiting.inc(1)
127+
main_process_metrics.num_requests_running.dec(1)
126128
if preempted_req == request:
127129
# No more request to preempt.
128130
can_schedule = False
@@ -369,6 +371,8 @@ def schedule(self):
369371
token_budget -= num_new_tokens
370372
request.num_computed_tokens += num_new_tokens
371373
request.status = RequestStatus.RUNNING
374+
main_process_metrics.num_requests_waiting.dec(1)
375+
main_process_metrics.num_requests_running.inc(1)
372376
allocated_position = self.get_available_position()
373377
request.idx = allocated_position
374378
self.tasks_list[allocated_position] = request
@@ -399,6 +403,8 @@ def schedule(self):
399403
token_budget -= num_new_tokens
400404
request.num_computed_tokens += num_new_tokens
401405
request.status = RequestStatus.RUNNING
406+
main_process_metrics.num_requests_waiting.dec(1)
407+
main_process_metrics.num_requests_running.inc(1)
402408
else:
403409
if self.config.cache_config.enable_prefix_caching:
404410
self._free_blocks(request)

tests/utils/test_config.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import unittest
22

3+
from fastdeploy import envs
34
from fastdeploy.config import (
45
CacheConfig,
56
FDConfig,
@@ -48,7 +49,8 @@ def test_fdconfig_max_num_tokens(self):
4849
ips="0.0.0.0",
4950
test_mode=True,
5051
)
51-
assert fd_config.max_num_batched_tokens == 2048
52+
if not envs.ENABLE_V1_KVCACHE_SCHEDULER:
53+
assert fd_config.max_num_batched_tokens == 2048
5254

5355
cache_config.enable_chunked_prefill = False
5456
fd_config = FDConfig(
@@ -58,7 +60,8 @@ def test_fdconfig_max_num_tokens(self):
5860
ips="0.0.0.0",
5961
test_mode=True,
6062
)
61-
assert fd_config.max_num_batched_tokens == 8192
63+
if not envs.ENABLE_V1_KVCACHE_SCHEDULER:
64+
assert fd_config.max_num_batched_tokens == 8192
6265

6366
def test_fdconfig_init_cache(self):
6467
parallel_config = ParallelConfig({})

0 commit comments

Comments
 (0)