Skip to content

Commit 5dea124

Browse files
committed
update
Signed-off-by: Chenfei Zhang <chenfeiz@nvidia.com>
1 parent 4c6d04a commit 5dea124

File tree

6 files changed

+64
-145
lines changed

6 files changed

+64
-145
lines changed

jenkins/runPerfSanityTriage.groovy

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,4 +110,3 @@ pipeline {
110110
} // stage Run Perf Sanity Script
111111
} // stages
112112
} // pipeline
113-

jenkins/scripts/open_search_db.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -257,7 +257,9 @@ def queryFromOpenSearchDB(json_data, project) -> dict:
257257
OpenSearchDB.logger.info(
258258
f"project {project} is not in read access project list: {json.dumps(READ_ACCESS_PROJECT_NAME)}"
259259
)
260-
print(f"project {project} is not in read access project list: {json.dumps(READ_ACCESS_PROJECT_NAME)}")
260+
print(
261+
f"project {project} is not in read access project list: {json.dumps(READ_ACCESS_PROJECT_NAME)}"
262+
)
261263
return None
262264
if not isinstance(json_data, str):
263265
json_data_dump = json.dumps(json_data)
@@ -279,12 +281,16 @@ def queryFromOpenSearchDB(json_data, project) -> dict:
279281
OpenSearchDB.logger.info(
280282
f"OpenSearchDB query failed, will retry, error:{res.status_code} {res.text}"
281283
)
282-
print(f"OpenSearchDB query failed, will retry, error:{res.status_code} {res.text}")
284+
print(
285+
f"OpenSearchDB query failed, will retry, error:{res.status_code} {res.text}"
286+
)
283287
retry_time -= 1
284288
OpenSearchDB.logger.info(
285289
f"Fail to queryFromOpenSearchDB after {retry_time} retry: {url}, json: {json_data_dump}, error: {res.text}"
286290
)
287-
print(f"Fail to queryFromOpenSearchDB after {retry_time} retry: {url}, json: {json_data_dump}, error: {res.text}")
291+
print(
292+
f"Fail to queryFromOpenSearchDB after {retry_time} retry: {url}, json: {json_data_dump}, error: {res.text}"
293+
)
288294
return None
289295

290296
@staticmethod

jenkins/scripts/perf/perf_sanity_triage.py

Lines changed: 38 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -14,45 +14,31 @@
1414
QUERY_LOOKBACK_DAYS = 7
1515
MAX_QUERY_SIZE = 3000
1616

17+
1718
def query_regression_data(project_name):
18-
"""
19-
Query regression data from OpenSearch database.
20-
Returns data where b_is_regression is True.
21-
"""
19+
"""Query regression data from OpenSearch database."""
2220
last_days = QUERY_LOOKBACK_DAYS
2321

2422
must_clauses = [
25-
{
26-
"term": {
27-
"b_is_valid": True
28-
}
29-
},
30-
{
31-
"term": {
32-
"b_is_post_merge": True
33-
}
34-
},
35-
{
36-
"term": {
37-
"b_is_regression": True
38-
}
39-
},
23+
{"term": {"b_is_valid": True}},
24+
{"term": {"b_is_post_merge": True}},
25+
{"term": {"b_is_regression": True}},
4026
{
4127
"range": {
4228
"ts_created": {
43-
"gte":
44-
int(time.time() - 24 * 3600 * last_days) // (24 * 3600) *
45-
24 * 3600 * 1000,
29+
"gte": int(time.time() - 24 * 3600 * last_days)
30+
// (24 * 3600)
31+
* 24
32+
* 3600
33+
* 1000,
4634
}
4735
}
4836
},
4937
]
5038

5139
json_data = {
5240
"query": {
53-
"bool": {
54-
"must": must_clauses
55-
},
41+
"bool": {"must": must_clauses},
5642
},
5743
"size": MAX_QUERY_SIZE,
5844
}
@@ -62,38 +48,30 @@ def query_regression_data(project_name):
6248
try:
6349
res = OpenSearchDB.queryFromOpenSearchDB(json_data, project_name)
6450
if res is None:
65-
print(
66-
f"Failed to query from {project_name}, returned no response")
51+
print(f"Failed to query from {project_name}, returned no response")
6752
return None
6853
else:
6954
payload = res.json().get("hits", {}).get("hits", [])
7055
if len(payload) == 0:
71-
print(
72-
f"No regression data found in {project_name}, returned empty list"
73-
)
56+
print(f"No regression data found in {project_name}, returned empty list")
7457
return []
7558
for hit in payload:
7659
data_dict = hit.get("_source", {})
7760
data_dict["_id"] = hit.get("_id", "")
7861
if data_dict["_id"] == "":
79-
print(
80-
f"Failed to query from {project_name}, returned data with no _id"
81-
)
62+
print(f"Failed to query from {project_name}, returned data with no _id")
8263
return None
8364
data_list.append(data_dict)
84-
print(
85-
f"Successfully queried from {project_name}, queried {len(data_list)} entries"
86-
)
65+
print(f"Successfully queried from {project_name}, queried {len(data_list)} entries")
8766
return data_list
8867
except Exception as e:
8968
print(f"Failed to query from {project_name}, returned error: {e}")
9069
return None
9170

9271

9372
def get_regression_data_by_job_id(data_list, query_job_number):
94-
"""
95-
Categorize regression data by s_job_id.
96-
Returns a dict with job_id as key and list of regression data as value.
73+
"""Returns a dict with job_id as key and list of regression data as value.
74+
9775
Only returns the latest query_job_number jobs.
9876
"""
9977
if data_list is None or len(data_list) == 0:
@@ -111,14 +89,10 @@ def get_regression_data_by_job_id(data_list, query_job_number):
11189

11290
# Sort job_ids by the latest ts_created in each group (descending)
11391
def get_latest_timestamp(job_id):
114-
timestamps = [
115-
d.get("ts_created", 0) for d in job_data_dict[job_id]
116-
]
92+
timestamps = [d.get("ts_created", 0) for d in job_data_dict[job_id]]
11793
return max(timestamps) if timestamps else 0
11894

119-
sorted_job_ids = sorted(job_data_dict.keys(),
120-
key=get_latest_timestamp,
121-
reverse=True)
95+
sorted_job_ids = sorted(job_data_dict.keys(), key=get_latest_timestamp, reverse=True)
12296

12397
# Only keep the latest query_job_number jobs
12498
latest_job_ids = sorted_job_ids[:query_job_number]
@@ -131,8 +105,8 @@ def get_latest_timestamp(job_id):
131105

132106

133107
def send_regression_message(regression_dict, channel_id, bot_token):
134-
"""
135-
Organize regression data into message format and send to Slack channel(s).
108+
"""Organize regression data into message format and send to Slack channel(s).
109+
136110
channel_id can be a single ID or multiple IDs separated by commas.
137111
"""
138112
if not regression_dict:
@@ -146,8 +120,7 @@ def send_regression_message(regression_dict, channel_id, bot_token):
146120
job_header = f"*LLM/main/L0_PostMerge/{job_id}:*\n"
147121
msg_parts.append(job_header)
148122

149-
sorted_data_list = sorted(data_list,
150-
key=lambda x: x.get("s_test_case_name", ""))
123+
sorted_data_list = sorted(data_list, key=lambda x: x.get("s_test_case_name", ""))
151124
for idx, data in enumerate(sorted_data_list, start=1):
152125
test_case_name = data.get("s_test_case_name", "N/A")
153126
regression_info = data.get("s_regression_info", "N/A")
@@ -169,9 +142,7 @@ def send_regression_message(regression_dict, channel_id, bot_token):
169142

170143

171144
def send_message(msg, channel_id, bot_token):
172-
"""
173-
Send message to Slack channel using slack_sdk.
174-
"""
145+
"""Send message to Slack channel using slack_sdk."""
175146
try:
176147
client = WebClient(token=bot_token)
177148

@@ -196,28 +167,19 @@ def send_message(msg, channel_id, bot_token):
196167

197168

198169
def main():
199-
parser = argparse.ArgumentParser(
200-
description="Perf Sanity Triage Script")
201-
parser.add_argument("--project_name",
202-
type=str,
203-
required=True,
204-
help="OpenSearch project name")
205-
parser.add_argument("--operation",
206-
type=str,
207-
required=True,
208-
help="Operation to perform")
209-
parser.add_argument("--channel_id",
210-
type=str,
211-
default="",
212-
help="Slack channel ID(s), comma-separated for multiple channels")
213-
parser.add_argument("--bot_token",
214-
type=str,
215-
default="",
216-
help="Slack bot token")
217-
parser.add_argument("--query_job_number",
218-
type=int,
219-
default=1,
220-
help="Number of latest jobs to query")
170+
parser = argparse.ArgumentParser(description="Perf Sanity Triage Script")
171+
parser.add_argument("--project_name", type=str, required=True, help="OpenSearch project name")
172+
parser.add_argument("--operation", type=str, required=True, help="Operation to perform")
173+
parser.add_argument(
174+
"--channel_id",
175+
type=str,
176+
default="",
177+
help="Slack channel ID(s), comma-separated for multiple channels",
178+
)
179+
parser.add_argument("--bot_token", type=str, default="", help="Slack bot token")
180+
parser.add_argument(
181+
"--query_job_number", type=int, default=1, help="Number of latest jobs to query"
182+
)
221183

222184
args = parser.parse_args()
223185

@@ -233,11 +195,9 @@ def main():
233195
print("Failed to query regression data")
234196
return
235197

236-
regression_dict = get_regression_data_by_job_id(data_list,
237-
args.query_job_number)
198+
regression_dict = get_regression_data_by_job_id(data_list, args.query_job_number)
238199

239-
send_regression_message(regression_dict, args.channel_id,
240-
args.bot_token)
200+
send_regression_message(regression_dict, args.channel_id, args.bot_token)
241201
else:
242202
print(f"Unknown operation: {args.operation}")
243203

tests/integration/defs/perf/open_search_db_utils.py

Lines changed: 8 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -643,65 +643,19 @@ def _get_metric_keys():
643643
return metric_keys
644644

645645

646-
def _print_perf_data(data):
647-
"""Print performance metrics and config for a single data entry."""
648-
print_info("=== Metrics ===")
649-
for metric in MAXIMIZE_METRICS + MINIMIZE_METRICS:
650-
if metric in data:
651-
value = data.get(metric, "N/A")
652-
print_info(f'"{metric}": {value}')
653-
654-
metric_keys = _get_metric_keys()
655-
print_info("\n=== Config ===")
656-
config_keys = sorted([key for key in data.keys() if key not in metric_keys])
657-
for key in config_keys:
658-
value = data[key]
659-
print_info(f'"{key}": {value}')
660-
661-
662646
def _print_regression_data(data, print_func=None):
663647
"""
664-
Print regression info, metrics with baselines/thresholds, and config.
648+
Print regression info and config.
665649
"""
666650
if print_func is None:
667651
print_func = print_info
668652

669653
if "s_regression_info" in data:
670654
print_func("=== Regression Info ===")
671-
print_func(f"{data['s_regression_info']}")
655+
for item in data["s_regression_info"].split(","):
656+
print_func(item.strip())
672657

673658
metric_keys = _get_metric_keys()
674-
is_post_merge = data.get("b_is_post_merge", False)
675-
676-
print_func("=== Metrics ===")
677-
for metric in MAXIMIZE_METRICS + MINIMIZE_METRICS:
678-
metric_suffix = metric[2:] # Strip "d_" prefix
679-
baseline_key = f"d_baseline_{metric_suffix}"
680-
if is_post_merge:
681-
threshold_key = f"d_threshold_post_merge_{metric_suffix}"
682-
else:
683-
threshold_key = f"d_threshold_pre_merge_{metric_suffix}"
684-
# Only print if at least one of the keys exists
685-
if metric in data or baseline_key in data or threshold_key in data:
686-
value = data.get(metric, "N/A")
687-
baseline = data.get(baseline_key, "N/A")
688-
threshold = data.get(threshold_key, "N/A")
689-
# Calculate percentage difference between value and baseline
690-
# Positive percentage means better perf, negative means regression
691-
if (isinstance(value, (int, float))
692-
and isinstance(baseline, (int, float)) and baseline != 0):
693-
if metric in MAXIMIZE_METRICS:
694-
# Larger is better: value > baseline is positive (better)
695-
percentage = (value - baseline) / baseline * 100
696-
else:
697-
# Smaller is better: value < baseline is positive (better)
698-
percentage = (baseline - value) / baseline * 100
699-
percentage_str = f"{percentage:+.2f}%"
700-
else:
701-
percentage_str = "N/A"
702-
print_func(
703-
f'"{metric}": {value}, "{baseline_key}": {baseline}, '
704-
f'"{threshold_key}": {threshold}, "diff": {percentage_str}')
705659

706660
print_func("\n=== Config ===")
707661
config_keys = sorted([key for key in data.keys() if key not in metric_keys])
@@ -721,7 +675,6 @@ def check_perf_regression(new_data_dict):
721675
data for data in new_data_dict.values()
722676
if data.get("b_is_regression", False)
723677
]
724-
725678
# Split regression data into post-merge and pre-merge
726679
post_merge_regressions = [
727680
data for data in regressive_data_list
@@ -744,15 +697,16 @@ def check_perf_regression(new_data_dict):
744697

745698
# Print post-merge regression data with print_error
746699
if len(post_merge_regressions) > 0:
700+
print_error(
701+
f"Found {len(post_merge_regressions)} post-merge regression data")
747702
for i, data in enumerate(post_merge_regressions):
748703
print_error(f"\n{'=' * 60}")
749704
print_error(f"Post-merge Regression Data #{i + 1}")
750705
print_error("=" * 60)
751706
_print_regression_data(data, print_func=print_error)
752-
print_error(
753-
f"Found {len(post_merge_regressions)} post-merge regression data")
754-
raise RuntimeError(
755-
f"Found {len(post_merge_regressions)} post-merge regression data")
707+
# Regression will not fail the test.
708+
# raise RuntimeError(
709+
# f"Found {len(post_merge_regressions)} post-merge regression data")
756710

757711
# Print summary if no regressions
758712
if len(regressive_data_list) == 0:

tests/integration/defs/perf/test_perf_sanity.py

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1176,9 +1176,7 @@ def get_perf_result(self, outputs: Dict[int, List[str]]):
11761176
"""Parse performance results from outputs."""
11771177

11781178
def parse_metrics_from_output(output: str) -> Dict[str, float] | None:
1179-
"""Parse all metrics from a single output string.
1180-
1181-
"""
1179+
"""Parse all metrics from a single output string."""
11821180
metrics = {}
11831181
lines = output.split("\n")
11841182
for metric_type, regex in PERF_METRIC_LOG_QUERIES.items():
@@ -1389,16 +1387,19 @@ def check_test_failure(self):
13891387
failed_servers = []
13901388
for server_idx, client_configs in self.server_client_configs.items():
13911389
server_perf_results = self._perf_results.get(server_idx, [])
1392-
if (
1393-
len(server_perf_results) != len(client_configs)
1394-
or any(metrics is None for metrics in server_perf_results)
1390+
if len(server_perf_results) != len(client_configs) or any(
1391+
metrics is None for metrics in server_perf_results
13951392
):
13961393
failed_servers.append(server_idx)
13971394

13981395
if failed_servers:
13991396
for server_idx in failed_servers:
1400-
print_error(f"Server {server_idx} failed: perf results count mismatch or incomplete metrics")
1401-
raise Exception(f"Test failed: servers {failed_servers} did not produce expected results")
1397+
print_error(
1398+
f"Server {server_idx} failed: perf results count mismatch or incomplete metrics"
1399+
)
1400+
raise Exception(
1401+
f"Test failed: servers {failed_servers} did not produce expected results"
1402+
)
14021403

14031404
print_info("All servers passed")
14041405

tests/integration/test_lists/waives.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -497,7 +497,6 @@ disaggregated/test_disaggregated.py::test_disaggregated_benchmark_on_diff_backen
497497
disaggregated/test_disaggregated.py::test_disaggregated_benchmark_on_diff_backends[DeepSeek-V3-Lite-bf16] SKIP (https://nvbugs/5769890)
498498
disaggregated/test_disaggregated.py::test_disaggregated_benchmark_on_diff_backends[llama-v3-8b-hf] SKIP (https://nvbugs/5769890,https://nvbugs/5748683)
499499
accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_nvfp4_multi_gpus[throughput_pp4_mtp] SKIP (https://nvbugs/5779536)
500-
perf/test_perf_sanity.py::test_e2e[disagg_upload-deepseek-r1-fp4_1k1k_ctx1_gen1_dep8_bs768_eplb0_mtp0_ccb-UCX] SKIP (https://nvbugs/5778381)
501500
unittest/_torch/attention/test_flashinfer_star_attn.py::TestStarAttention::test_flashinfer_star_attention[num_layers:2-num_heads:32-num_kv_heads:8-head_dim:64-anchor_size:64-block_size:64-dtype:torch.float16] SKIP (https://nvbugs/5781389)
502501
unittest/_torch/ray_orchestrator/multi_gpu/test_ops.py::test_reducescatter_pg_op[var_len:True-seqlen:16-hidden:128] SKIP (https://nvbugs/5781383)
503502
cpp/test_e2e.py::test_model[-mamba-86] SKIP (https://nvbugs/5781665)

0 commit comments

Comments
 (0)