Skip to content

Commit c4abe9b

Browse files
authored
feat: Add prompt > seq_len k8 tests. (#3930)
Signed-off-by: [email protected] <[email protected]>
1 parent 94ad508 commit c4abe9b

File tree

7 files changed

+863
-104
lines changed

7 files changed

+863
-104
lines changed

tests/fault_tolerance/deploy/README.md

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,17 @@ The following failure types are defined in `scenarios.py`:
119119
| `sglang_prefill_scheduler` | Terminate SGLang prefill scheduler process. | `SIGKILL` to `sglang::scheduler`| sglang only |
120120
| `sglang_prefill_detokenizer` | Terminate SGLang prefill detokenizer process. | `SIGKILL` to `sglang::detokenizer`| sglang only |
121121

122+
#### Token Overflow Tests
123+
124+
In addition to process and pod failures, the suite includes tests for **token overflow**, where the model receives an input prompt larger than its configured `max_seq_len`. These tests are crucial for verifying that the system can gracefully reject invalid requests without crashing.
125+
126+
- **Failure Injection**: Unlike other tests, this failure is injected from the **client side**. The `aiperf` client is configured to send a batch of requests with oversized token lengths.
127+
- **Two-Phase Execution**: These tests run in two distinct phases, creating separate log directories for each:
128+
1. **`overflow` Phase**: Sends oversized requests. The expected outcome is a high rate of failed requests (rejections) as the server correctly identifies and blocks them.
129+
2. **`recovery` Phase**: Immediately after the overflow phase, sends valid, normal-sized requests. The expected outcome is a high success rate, confirming that the system has recovered and remains operational.
130+
131+
The combined results of these two phases demonstrate both the system's ability to reject invalid inputs and its stability after handling them.
132+
122133
#### Example Scenario Breakdown
123134

124135
**Scenario**: `sglang-agg-tp-2-dp-1-decode_worker`
@@ -392,7 +403,6 @@ graph LR
392403
style DecodePool stroke:#000,stroke-width:2px
393404
```
394405

395-
396406
#### Summary:
397407

398408

@@ -596,3 +606,5 @@ Test Group: vllm-agg-tp-1-dp-2
596606
╘═══════════════════╧═══════════╧═══════════╧══════════╧═══════════╧══════════╧═══════════╧═══════════╧════════════╛
597607

598608
```
609+
610+

tests/fault_tolerance/deploy/legacy_parse_results.py

Lines changed: 16 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -418,14 +418,15 @@ def process_test_directory(test_dir, sla):
418418
}
419419

420420

421-
def main(logs_dir, tablefmt, log_paths=None, sla=None):
421+
def main(logs_dir, tablefmt, log_paths=None, sla=None, print_output=True):
422422
"""Main entry point for parsing legacy client results.
423423
424424
Args:
425425
logs_dir: Base directory containing test results
426426
tablefmt: Table format for output (e.g., "fancy_grid")
427427
log_paths: Optional list of specific log paths to process
428428
sla: Optional SLA threshold for latency violations
429+
print_output: If True, print tables and summaries. If False, only return results.
429430
"""
430431
results = []
431432

@@ -542,19 +543,21 @@ def main(logs_dir, tablefmt, log_paths=None, sla=None):
542543
]
543544
rows.append(row)
544545

545-
print(f"\nTest Group: {test_prefix}")
546-
print(
547-
tabulate(
548-
rows,
549-
headers,
550-
tablefmt=tablefmt,
551-
floatfmt=".2f",
552-
missingval="N/A",
553-
numalign="right",
554-
stralign="center",
546+
if print_output:
547+
logging.info(f"\nTest Group: {test_prefix}")
548+
logging.info(
549+
"\n"
550+
+ tabulate(
551+
rows,
552+
headers,
553+
tablefmt=tablefmt,
554+
floatfmt=".2f",
555+
missingval="N/A",
556+
numalign="right",
557+
stralign="center",
558+
)
555559
)
556-
)
557-
print("\n" + "=" * 80)
560+
logging.info("\n" + "=" * 80)
558561

559562

560563
if __name__ == "__main__":

tests/fault_tolerance/deploy/parse_factory.py

Lines changed: 19 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,9 @@ def parse_test_results(
103103
log_paths: Optional[List[str]] = None,
104104
tablefmt: str = "grid",
105105
sla: Optional[float] = None,
106+
success_threshold: float = 90.0,
106107
force_parser: Optional[str] = None,
108+
print_output: bool = True,
107109
) -> Any:
108110
"""Auto-detect and parse test results using the appropriate parser.
109111
@@ -116,8 +118,10 @@ def parse_test_results(
116118
log_paths: List of log directories to process (for multiple directories)
117119
tablefmt: Table format for output (e.g., "fancy_grid", "pipe")
118120
sla: Optional SLA threshold for latency violations
121+
success_threshold: Success rate threshold for pass/fail (default: 90.0)
119122
force_parser: Optional override to force using a specific parser
120123
("aiperf" or "legacy"). If not provided, auto-detection is used.
124+
print_output: If True, print tables and summaries. If False, only return results.
121125
122126
Returns:
123127
Results from the appropriate parser
@@ -189,13 +193,17 @@ def parse_test_results(
189193
log_paths=log_paths,
190194
tablefmt=tablefmt,
191195
sla=sla,
196+
success_threshold=success_threshold,
197+
print_output=print_output,
192198
)
193199
else:
194200
return parse_aiperf(
195201
logs_dir=log_dir,
196202
log_paths=None,
197203
tablefmt=tablefmt,
198204
sla=sla,
205+
success_threshold=success_threshold,
206+
print_output=print_output,
199207
)
200208

201209
elif parser_type == "legacy":
@@ -209,13 +217,15 @@ def parse_test_results(
209217
log_paths=log_paths,
210218
tablefmt=tablefmt,
211219
sla=sla,
220+
print_output=print_output,
212221
)
213222
else:
214223
return parse_legacy(
215224
logs_dir=log_dir,
216225
log_paths=None,
217226
tablefmt=tablefmt,
218227
sla=sla,
228+
print_output=print_output,
219229
)
220230

221231
else:
@@ -294,18 +304,18 @@ def print_result_info(log_dir: str) -> None:
294304
"""
295305
info = get_result_info(log_dir)
296306

297-
print(f"\nTest Results Information: {log_dir}")
298-
print("=" * 60)
299-
print(f"Result Type: {info['type'] or 'Unknown'}")
300-
print(f"Client Count: {info['client_count']}")
301-
print(f"Has Test Log: {info['has_test_log']}")
307+
logging.info(f"\nTest Results Information: {log_dir}")
308+
logging.info("=" * 60)
309+
logging.info(f"Result Type: {info['type'] or 'Unknown'}")
310+
logging.info(f"Client Count: {info['client_count']}")
311+
logging.info(f"Has Test Log: {info['has_test_log']}")
302312

303313
if info["details"]:
304-
print("\nDetails:")
314+
logging.info("\nDetails:")
305315
for key, value in info["details"].items():
306-
print(f" {key}: {value}")
316+
logging.info(f" {key}: {value}")
307317

308-
print("=" * 60)
318+
logging.info("=" * 60)
309319

310320

311321
if __name__ == "__main__":
@@ -354,7 +364,7 @@ def print_result_info(log_dir: str) -> None:
354364
for log_path in args.log_paths:
355365
print_result_info(log_path)
356366
else:
357-
print("Error: Must provide log_dir or --log-paths")
367+
logging.error("Must provide log_dir or --log-paths")
358368
else:
359369
# Parse mode
360370
try:

0 commit comments

Comments
 (0)