Skip to content

Commit 97c0fc8

Browse files
authored
Merge pull request #1 from NumberWan/ttft_routing
[Feat] - New feature: Add configurable unfinished query limit in multi-round-qa.py
2 parents 0980f72 + d899ec7 commit 97c0fc8

File tree

1 file changed

+17
-0
lines changed

1 file changed

+17
-0
lines changed

benchmarks/multi-round-qa/multi-round-qa.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,9 @@ class WorkloadConfig:
4040
# Whether to include user id in request header
4141
enable_user_id: bool
4242

43+
# Max number of unfinished queries allowed (None means no limit)
44+
max_unfinished_queries: Optional[int]
45+
4346

4447
@dataclass
4548
class UserConfig:
@@ -419,6 +422,13 @@ def step(self, timestamp: float, executor: RequestExecutor):
419422
if self.start_time is None:
420423
self.start_time = timestamp
421424

425+
pending_queries = len([s for s in self.sessions if s.has_unfinished_request])
426+
# Only check limit if max_unfinished_queries is set
427+
if (self.workload_config.max_unfinished_queries is not None and
428+
pending_queries > self.workload_config.max_unfinished_queries):
429+
logger.info(f"unfinished queries >{self.workload_config.max_unfinished_queries}, waiting")
430+
return
431+
422432
if timestamp - self.last_user_join > self.gap_between_users:
423433
self._create_user_session()
424434
self.last_user_join = timestamp
@@ -625,6 +635,12 @@ def parse_arguments() -> WorkloadConfig:
625635
parser.add_argument(
626636
"--sharegpt", action="store_true", help="Whether to use ShareGPT dataset"
627637
)
638+
parser.add_argument(
639+
"--max-unfinished-queries",
640+
type=int,
641+
default=None,
642+
help="Maximum number of unfinished queries allowed (default: no limit)",
643+
)
628644
args = parser.parse_args()
629645
return args
630646

@@ -675,6 +691,7 @@ def main():
675691
qps=args.qps,
676692
model=args.model,
677693
enable_user_id=args.request_with_user_id,
694+
max_unfinished_queries=args.max_unfinished_queries,
678695
)
679696

680697
manager = UserSessionManager(

0 commit comments

Comments
 (0)