Skip to content

Commit cff106e

Browse files
committed
[CI] Add unit tests about concurrent partial prefills
Signed-off-by: Csrayz <[email protected]>
1 parent 510f455 commit cff106e

File tree

2 files changed

+26
-2
lines changed

2 files changed

+26
-2
lines changed

tests/ut/core/test_schedule_config.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,8 @@ def test_initialize_from_config_with_override(self):
5151
num_scheduler_steps=1,
5252
scheduler_cls="vllm_ascend.core.scheduler.AscendScheduler",
5353
max_num_batched_tokens=2048,
54+
max_long_partial_prefills=1,
55+
long_prefill_token_threshold=512,
5456
),
5557
)
5658
self.assertEqual(ascend_config.enable_chunked_prefill, False)
@@ -60,7 +62,8 @@ def test_initialize_from_config_with_override(self):
6062
"vllm_ascend.core.scheduler.AscendScheduler")
6163
self.assertEqual(ascend_config.max_num_batched_tokens, 2048)
6264
self.assertEqual(ascend_config.encoder_cache_size, 2048)
63-
65+
self.assertEqual(ascend_config.max_long_partial_prefills, 1)
66+
self.assertEqual(ascend_config.long_prefill_token_threshold, 512)
6467
def test_not_implemented_policy(self):
6568
with self.assertRaises(NotImplementedError) as context:
6669
AscendSchedulerConfig.initialize_from_config(

tests/ut/core/test_scheduler.py

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -226,7 +226,7 @@ def test_get_num_unfinished_requests(self):
226226
len(requests) - i - 1)
227227

228228
def test_schedule(self):
229-
'''Test scheduling.
229+
'''Test scheduling.
230230
Two cases: default APC/no prompt logprobs; APC=True + prompt logprobs
231231
'''
232232
scheduler = self.create_scheduler()
@@ -251,6 +251,27 @@ def test_schedule(self):
251251
for i, request in enumerate(requests):
252252
self.assertEqual(scheduler.running[i], request)
253253

254+
def test_concurrent_partial_prefills_schedule(self):
255+
'''Test concurrent partial prefills scheduling.
256+
total requests = 10, every request has 10 token.
257+
while set long_prefill_token_threshold = 1, scheduler can
258+
only schedule max_long_partial_prefills long request.
259+
'''
260+
scheduler = self.create_scheduler()
261+
scheduler.scheduler_config.chunked_prefill_enabled = False
262+
scheduler.scheduler_config.max_long_partial_prefills = 2
263+
scheduler.scheduler_config.long_prefill_token_threshold = 1
264+
requests = create_requests(num_requests=10, num_tokens=20)
265+
for request in requests:
266+
scheduler.add_request(request)
267+
268+
# Test initial scheduling
269+
output = scheduler.schedule()
270+
self.assertEqual(len(output.scheduled_new_reqs),
271+
scheduler.scheduler_config.max_long_partial_prefills)
272+
self.assertEqual(output.scheduled_cached_reqs.num_reqs, 0)
273+
self.assertEqual(len(output.finished_req_ids), 0)
274+
254275
def test_schedule_enable_prefix_caching(self):
255276
'''Test scheduling.
256277
Two cases: default APC/no prompt logprobs; APC=True + prompt logprobs

0 commit comments

Comments
 (0)