File tree Expand file tree Collapse file tree 2 files changed +9
-0
lines changed
tests/integration/defs/perf Expand file tree Collapse file tree 2 files changed +9
-0
lines changed Original file line number Diff line number Diff line change @@ -342,6 +342,11 @@ def import_allowed_perf_config():
342342 PerfMetricType .KV_CACHE_SIZE :
343343 re .compile (r".*(?:Allocated ([\d\.]+) GiB for max tokens in paged KV cache|"
344344 r"Final KV cache size after resize: ([\d\.]+) GiB).*" ),
345+ PerfMetricType .PER_USER_OUTPUT_THROUGHPUT :
346+ re .compile (
347+ r"Per User Output Throughput \[w\/ ctx\] \(tps\/user\):\s+([\d\.]+)" ),
348+ PerfMetricType .PER_GPU_OUTPUT_THROUGHPUT :
349+ re .compile (r"Per GPU Output Throughput \(tps\/gpu\):\s+([\d\.]+)" ),
345350}
346351
347352AGGR_SERVER_PERF_METRIC_LOG_QUERIES = {
@@ -457,6 +462,8 @@ def import_allowed_perf_config():
457462 PerfMetricType .ENGINE_SIZE : "engine_size" ,
458463 PerfMetricType .CONTEXT_GPU_MEMORY : "context_gpu_memory" ,
459464 PerfMetricType .KV_CACHE_SIZE : "kv_cache_size" ,
465+ PerfMetricType .PER_USER_OUTPUT_THROUGHPUT : "per_user_output_throughput" ,
466+ PerfMetricType .PER_GPU_OUTPUT_THROUGHPUT : "per_gpu_output_throughput" ,
460467}
461468
462469BUILDER_METRICS = [
Original file line number Diff line number Diff line change @@ -114,6 +114,8 @@ class PerfMetricType(str, Enum):
114114 KV_CACHE_SIZE = "KV_CACHE_SIZE"
115115 DISAGG_SERVER_E2EL = "DISAGG_SERVER_E2EL"
116116 DISAGG_SERVER_TTFT = "DISAGG_SERVER_TTFT"
117+ PER_USER_OUTPUT_THROUGHPUT = "PER_USER_OUTPUT_THROUGHPUT"
118+ PER_GPU_OUTPUT_THROUGHPUT = "PER_GPU_OUTPUT_THROUGHPUT"
117119
118120
119121@contextlib .contextmanager
You can’t perform that action at this time.
0 commit comments