Skip to content

Commit edf7329

Browse files
Extending the compare and runner tools
1 parent af9cc72 commit edf7329

File tree

7 files changed

+387
-102
lines changed

7 files changed

+387
-102
lines changed

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "redis-benchmarks-specification"
3-
version = "0.1.277"
3+
version = "0.1.281"
44
description = "The Redis benchmarks specification describes the cross-language/tools requirements and expectations to foster performance and observability standards around redis related technologies. Members from both industry and academia, including organizations and individuals are encouraged to contribute."
55
authors = ["filipecosta90 <[email protected]>","Redis Performance Group <[email protected]>"]
66
readme = "Readme.md"

redis_benchmarks_specification/__compare__/args.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,18 @@ def create_compare_arguments(parser):
5757
parser.add_argument("--baseline_github_org", type=str, default="")
5858
parser.add_argument("--comparison_github_org", type=str, default="")
5959
parser.add_argument("--triggering_env", type=str, default="ci")
60+
parser.add_argument(
61+
"--triggering_env_baseline",
62+
type=str,
63+
default=None,
64+
help="Triggering environment for baseline data. If not specified, falls back to --triggering_env"
65+
)
66+
parser.add_argument(
67+
"--triggering_env_comparison",
68+
type=str,
69+
default=None,
70+
help="Triggering environment for comparison data. If not specified, falls back to --triggering_env"
71+
)
6072
parser.add_argument("--github_token", type=str, default=PERFORMANCE_GH_TOKEN)
6173
parser.add_argument("--pull-request", type=str, default=None, nargs="?", const="")
6274
parser.add_argument("--deployment_name", type=str, default="oss-standalone")
@@ -67,6 +79,18 @@ def create_compare_arguments(parser):
6779
parser.add_argument(
6880
"--running_platform", type=str, default="intel64-ubuntu22.04-redis-icx1"
6981
)
82+
parser.add_argument(
83+
"--running_platform_baseline",
84+
type=str,
85+
default=None,
86+
help="Platform for baseline data. If not specified, falls back to --running_platform"
87+
)
88+
parser.add_argument(
89+
"--running_platform_comparison",
90+
type=str,
91+
default=None,
92+
help="Platform for comparison data. If not specified, falls back to --running_platform"
93+
)
7094
parser.add_argument("--extra-filter", type=str, default=None)
7195
parser.add_argument(
7296
"--last_n",

redis_benchmarks_specification/__compare__/compare.py

Lines changed: 153 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -251,6 +251,14 @@ def compare_command_logic(args, project_name, project_version):
251251
testname_regex = args.testname_regex
252252
auto_approve = args.auto_approve
253253
running_platform = args.running_platform
254+
255+
# Handle separate baseline and comparison platform/environment arguments
256+
# Fall back to general arguments if specific ones are not provided
257+
running_platform_baseline = args.running_platform_baseline or args.running_platform
258+
running_platform_comparison = args.running_platform_comparison or args.running_platform
259+
triggering_env_baseline = args.triggering_env_baseline or args.triggering_env
260+
triggering_env_comparison = args.triggering_env_comparison or args.triggering_env
261+
254262
baseline_target_version = args.baseline_target_version
255263
comparison_target_version = args.comparison_target_version
256264
baseline_target_branch = args.baseline_target_branch
@@ -262,10 +270,31 @@ def compare_command_logic(args, project_name, project_version):
262270
baseline_hash = args.baseline_hash
263271
comparison_hash = args.comparison_hash
264272

265-
if running_platform is not None:
273+
# Log platform and environment information
274+
if running_platform_baseline == running_platform_comparison:
275+
if running_platform_baseline is not None:
276+
logging.info(
277+
"Using platform named: {} for both baseline and comparison.\n\n".format(
278+
running_platform_baseline
279+
)
280+
)
281+
else:
282+
logging.info(
283+
"Using platform named: {} for baseline and {} for comparison.\n\n".format(
284+
running_platform_baseline, running_platform_comparison
285+
)
286+
)
287+
288+
if triggering_env_baseline == triggering_env_comparison:
289+
logging.info(
290+
"Using triggering environment: {} for both baseline and comparison.".format(
291+
triggering_env_baseline
292+
)
293+
)
294+
else:
266295
logging.info(
267-
"Using platform named: {} to do the comparison.\n\n".format(
268-
running_platform
296+
"Using triggering environment: {} for baseline and {} for comparison.".format(
297+
triggering_env_baseline, triggering_env_comparison
269298
)
270299
)
271300

@@ -328,7 +357,8 @@ def compare_command_logic(args, project_name, project_version):
328357
rts,
329358
tf_github_org,
330359
tf_github_repo,
331-
tf_triggering_env,
360+
triggering_env_baseline,
361+
triggering_env_comparison,
332362
metric_name,
333363
comparison_branch,
334364
baseline_branch,
@@ -352,7 +382,8 @@ def compare_command_logic(args, project_name, project_version):
352382
to_date,
353383
to_ts_ms,
354384
use_metric_context_path,
355-
running_platform,
385+
running_platform_baseline,
386+
running_platform_comparison,
356387
baseline_target_version,
357388
comparison_target_version,
358389
baseline_hash,
@@ -383,11 +414,13 @@ def compare_command_logic(args, project_name, project_version):
383414
pr_link,
384415
regression_comment,
385416
rts,
386-
running_platform,
417+
running_platform_baseline,
418+
running_platform_comparison,
387419
table_output,
388420
tf_github_org,
389421
tf_github_repo,
390-
tf_triggering_env,
422+
triggering_env_baseline,
423+
triggering_env_comparison,
391424
total_comparison_points,
392425
total_improvements,
393426
total_regressions,
@@ -423,11 +456,13 @@ def prepare_regression_comment(
423456
pr_link,
424457
regression_comment,
425458
rts,
426-
running_platform,
459+
running_platform_baseline,
460+
running_platform_comparison,
427461
table_output,
428462
tf_github_org,
429463
tf_github_repo,
430-
tf_triggering_env,
464+
triggering_env_baseline,
465+
triggering_env_comparison,
431466
total_comparison_points,
432467
total_improvements,
433468
total_regressions,
@@ -441,9 +476,25 @@ def prepare_regression_comment(
441476
if total_comparison_points > 0:
442477
comment_body = "### Automated performance analysis summary\n\n"
443478
comment_body += "This comment was automatically generated given there is performance data available.\n\n"
444-
if running_platform is not None:
445-
comment_body += "Using platform named: {} to do the comparison.\n\n".format(
446-
running_platform
479+
# Add platform information to comment
480+
if running_platform_baseline == running_platform_comparison:
481+
if running_platform_baseline is not None:
482+
comment_body += "Using platform named: {} for both baseline and comparison.\n\n".format(
483+
running_platform_baseline
484+
)
485+
else:
486+
comment_body += "Using platform named: {} for baseline and {} for comparison.\n\n".format(
487+
running_platform_baseline, running_platform_comparison
488+
)
489+
490+
# Add triggering environment information to comment
491+
if triggering_env_baseline == triggering_env_comparison:
492+
comment_body += "Using triggering environment: {} for both baseline and comparison.\n\n".format(
493+
triggering_env_baseline
494+
)
495+
else:
496+
comment_body += "Using triggering environment: {} for baseline and {} for comparison.\n\n".format(
497+
triggering_env_baseline, triggering_env_comparison
447498
)
448499
comparison_summary = "In summary:\n"
449500
if total_stable > 0:
@@ -507,7 +558,7 @@ def prepare_regression_comment(
507558

508559
if is_actionable_pr:
509560
zset_project_pull_request = get_project_compare_zsets(
510-
tf_triggering_env,
561+
triggering_env_baseline,
511562
tf_github_org,
512563
tf_github_repo,
513564
)
@@ -516,16 +567,22 @@ def prepare_regression_comment(
516567
zset_project_pull_request, comparison_branch
517568
)
518569
)
519-
_, start_time_ms, _ = get_start_time_vars()
520-
res = rts.zadd(
521-
zset_project_pull_request,
522-
{comparison_branch: start_time_ms},
523-
)
524-
logging.info(
525-
"Result of Populating the pull request performance ZSETs: {} with branch {}: {}".format(
526-
zset_project_pull_request, comparison_branch, res
570+
# Only add to Redis sorted set if comparison_branch is not None
571+
if comparison_branch is not None:
572+
_, start_time_ms, _ = get_start_time_vars()
573+
res = rts.zadd(
574+
zset_project_pull_request,
575+
{comparison_branch: start_time_ms},
576+
)
577+
logging.info(
578+
"Result of Populating the pull request performance ZSETs: {} with branch {}: {}".format(
579+
zset_project_pull_request, comparison_branch, res
580+
)
581+
)
582+
else:
583+
logging.warning(
584+
"Skipping Redis ZADD operation because comparison_branch is None"
527585
)
528-
)
529586

530587
if contains_regression_comment:
531588
update_comment_if_needed(
@@ -587,7 +644,8 @@ def compute_regression_table(
587644
rts,
588645
tf_github_org,
589646
tf_github_repo,
590-
tf_triggering_env,
647+
tf_triggering_env_baseline,
648+
tf_triggering_env_comparison,
591649
metric_name,
592650
comparison_branch,
593651
baseline_branch="unstable",
@@ -611,7 +669,8 @@ def compute_regression_table(
611669
to_date=None,
612670
to_ts_ms=None,
613671
use_metric_context_path=None,
614-
running_platform=None,
672+
running_platform_baseline=None,
673+
running_platform_comparison=None,
615674
baseline_target_version=None,
616675
comparison_target_version=None,
617676
comparison_hash=None,
@@ -672,7 +731,7 @@ def compute_regression_table(
672731
_,
673732
_,
674733
_,
675-
) = get_overall_dashboard_keynames(tf_github_org, tf_github_repo, tf_triggering_env)
734+
) = get_overall_dashboard_keynames(tf_github_org, tf_github_repo, tf_triggering_env_baseline)
676735
test_names = []
677736
used_key = testcases_setname
678737
test_filter = "test_name"
@@ -728,9 +787,11 @@ def compute_regression_table(
728787
simplify_table,
729788
test_filter,
730789
test_names,
731-
tf_triggering_env,
790+
tf_triggering_env_baseline,
791+
tf_triggering_env_comparison,
732792
verbose,
733-
running_platform,
793+
running_platform_baseline,
794+
running_platform_comparison,
734795
baseline_github_repo,
735796
comparison_github_repo,
736797
baseline_github_org,
@@ -1047,9 +1108,11 @@ def from_rts_to_regression_table(
10471108
simplify_table,
10481109
test_filter,
10491110
test_names,
1050-
tf_triggering_env,
1111+
tf_triggering_env_baseline,
1112+
tf_triggering_env_comparison,
10511113
verbose,
1052-
running_platform=None,
1114+
running_platform_baseline=None,
1115+
running_platform_comparison=None,
10531116
baseline_github_repo="redis",
10541117
comparison_github_repo="redis",
10551118
baseline_github_org="redis",
@@ -1109,28 +1172,28 @@ def from_rts_to_regression_table(
11091172
"{}={}".format(test_filter, test_name),
11101173
"deployment_name={}".format(baseline_deployment_name),
11111174
"github_repo={}".format(baseline_github_repo),
1112-
"triggering_env={}".format(tf_triggering_env),
1175+
"triggering_env={}".format(tf_triggering_env_baseline),
11131176
]
11141177
if baseline_github_org != "":
11151178
filters_baseline.append(f"github_org={baseline_github_org}")
1116-
if running_platform is not None:
1117-
filters_baseline.append("running_platform={}".format(running_platform))
1179+
if running_platform_baseline is not None:
1180+
filters_baseline.append("running_platform={}".format(running_platform_baseline))
11181181
filters_comparison = [
11191182
"{}={}".format(by_str_comparison, comparison_str),
11201183
"metric={}".format(metric_name),
11211184
"{}={}".format(test_filter, test_name),
11221185
"deployment_name={}".format(comparison_deployment_name),
11231186
"github_repo={}".format(comparison_github_repo),
1124-
"triggering_env={}".format(tf_triggering_env),
1187+
"triggering_env={}".format(tf_triggering_env_comparison),
11251188
]
11261189
if comparison_github_org != "":
11271190
filters_comparison.append(f"github_org={comparison_github_org}")
11281191
if "hash" not in by_str_baseline:
11291192
filters_baseline.append("hash==")
11301193
if "hash" not in by_str_comparison:
11311194
filters_comparison.append("hash==")
1132-
if running_platform is not None:
1133-
filters_comparison.append("running_platform={}".format(running_platform))
1195+
if running_platform_comparison is not None:
1196+
filters_comparison.append("running_platform={}".format(running_platform_comparison))
11341197
baseline_timeseries = rts.ts().queryindex(filters_baseline)
11351198
comparison_timeseries = rts.ts().queryindex(filters_comparison)
11361199

@@ -1302,30 +1365,61 @@ def from_rts_to_regression_table(
13021365
if baseline_v != "N/A" or comparison_v != "N/A":
13031366
detected_regression = False
13041367
detected_improvement = False
1305-
if percentage_change < 0.0:
1306-
if -waterline >= percentage_change:
1307-
detected_regression = True
1308-
total_regressions = total_regressions + 1
1309-
note = note + f" {regression_str}"
1310-
detected_regressions.append(test_name)
1311-
elif percentage_change < -noise_waterline:
1312-
if simplify_table is False:
1313-
note = note + f" potential {regression_str}"
1314-
else:
1315-
if simplify_table is False:
1316-
note = note + " No Change"
1317-
1318-
if percentage_change > 0.0:
1319-
if percentage_change > waterline:
1320-
detected_improvement = True
1321-
total_improvements = total_improvements + 1
1322-
note = note + f" {improvement_str}"
1323-
elif percentage_change > noise_waterline:
1324-
if simplify_table is False:
1325-
note = note + f" potential {improvement_str}"
1326-
else:
1327-
if simplify_table is False:
1328-
note = note + " No Change"
1368+
1369+
# For higher-better metrics: negative change = regression, positive change = improvement
1370+
# For lower-better metrics: positive change = regression, negative change = improvement
1371+
if metric_mode == "higher-better":
1372+
# Higher is better: negative change is bad (regression), positive change is good (improvement)
1373+
if percentage_change < 0.0:
1374+
if -waterline >= percentage_change:
1375+
detected_regression = True
1376+
total_regressions = total_regressions + 1
1377+
note = note + f" {regression_str}"
1378+
detected_regressions.append(test_name)
1379+
elif percentage_change < -noise_waterline:
1380+
if simplify_table is False:
1381+
note = note + f" potential {regression_str}"
1382+
else:
1383+
if simplify_table is False:
1384+
note = note + " No Change"
1385+
1386+
if percentage_change > 0.0:
1387+
if percentage_change > waterline:
1388+
detected_improvement = True
1389+
total_improvements = total_improvements + 1
1390+
note = note + f" {improvement_str}"
1391+
elif percentage_change > noise_waterline:
1392+
if simplify_table is False:
1393+
note = note + f" potential {improvement_str}"
1394+
else:
1395+
if simplify_table is False:
1396+
note = note + " No Change"
1397+
else:
1398+
# Lower is better: positive change is bad (regression), negative change is good (improvement)
1399+
if percentage_change > 0.0:
1400+
if percentage_change >= waterline:
1401+
detected_regression = True
1402+
total_regressions = total_regressions + 1
1403+
note = note + f" {regression_str}"
1404+
detected_regressions.append(test_name)
1405+
elif percentage_change > noise_waterline:
1406+
if simplify_table is False:
1407+
note = note + f" potential {regression_str}"
1408+
else:
1409+
if simplify_table is False:
1410+
note = note + " No Change"
1411+
1412+
if percentage_change < 0.0:
1413+
if -percentage_change > waterline:
1414+
detected_improvement = True
1415+
total_improvements = total_improvements + 1
1416+
note = note + f" {improvement_str}"
1417+
elif -percentage_change > noise_waterline:
1418+
if simplify_table is False:
1419+
note = note + f" potential {improvement_str}"
1420+
else:
1421+
if simplify_table is False:
1422+
note = note + " No Change"
13291423

13301424
for test_group in tested_groups:
13311425
if test_group not in group_change:

0 commit comments

Comments
 (0)