diff --git a/pyproject.toml b/pyproject.toml index b7523fcf..569d3f25 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "redis-benchmarks-specification" -version = "0.1.245" +version = "0.1.247" description = "The Redis benchmarks specification describes the cross-language/tools requirements and expectations to foster performance and observability standards around redis related technologies. Members from both industry and academia, including organizations and individuals are encouraged to contribute." authors = ["filipecosta90 ","Redis Performance Group "] readme = "Readme.md" diff --git a/redis_benchmarks_specification/__cli__/cli.py b/redis_benchmarks_specification/__cli__/cli.py index 829cf37e..92970372 100644 --- a/redis_benchmarks_specification/__cli__/cli.py +++ b/redis_benchmarks_specification/__cli__/cli.py @@ -243,6 +243,7 @@ def get_commits_by_tags(args, repo): def get_repo(args): redisDirPath = args.redis_repo cleanUp = False + last_n = args.last_n if redisDirPath is None: cleanUp = True redisDirPath = tempfile.mkdtemp() @@ -250,7 +251,10 @@ def get_repo(args): logging.info( f"Retrieving redis repo from remote {remote_url} into {redisDirPath}. Using branch {args.branch}." ) - cmd = f"git clone {remote_url} {redisDirPath} --branch {args.branch}\n" + depth_str = "" + if last_n > 0: + depth_str = f" --depth {last_n}" + cmd = f"git clone {remote_url} {redisDirPath} --branch {args.branch} {depth_str}\n" process = subprocess.Popen( "/bin/bash", stdin=subprocess.PIPE, stdout=subprocess.PIPE ) diff --git a/redis_benchmarks_specification/__compare__/compare.py b/redis_benchmarks_specification/__compare__/compare.py index 399f4bc4..f0e759c1 100644 --- a/redis_benchmarks_specification/__compare__/compare.py +++ b/redis_benchmarks_specification/__compare__/compare.py @@ -1019,8 +1019,13 @@ def from_rts_to_regression_table( baseline_only_list = [] comparison_only_list = [] no_datapoints_list = [] + no_datapoints_baseline_list = [] + no_datapoints_comparison_list = [] + original_metric_mode = metric_mode for test_name in test_names: + metric_mode = original_metric_mode compare_version = "main" + # GE github_link = "https://github.com/redis/redis-benchmarks-specification/blob" test_path = f"redis_benchmarks_specification/test-suites/{test_name}.yml" test_link = f"[{test_name}]({github_link}/{compare_version}/{test_path})" @@ -1076,6 +1081,22 @@ def from_rts_to_regression_table( if len(baseline_timeseries) > 1 and multi_value_baseline is False: baseline_timeseries = get_only_Totals(baseline_timeseries) + if len(baseline_timeseries) == 0: + logging.warning( + f"No datapoints for test={test_name} for baseline timeseries {baseline_timeseries}" + ) + no_datapoints_baseline_list.append(test_name) + if test_name not in no_datapoints_list: + no_datapoints_list.append(test_name) + + if len(comparison_timeseries) == 0: + logging.warning( + f"No datapoints for test={test_name} for comparison timeseries {comparison_timeseries}" + ) + no_datapoints_comparison_list.append(test_name) + if test_name not in no_datapoints_list: + no_datapoints_list.append(test_name) + if len(baseline_timeseries) != 1 and multi_value_baseline is False: if verbose: logging.warning( @@ -1152,11 +1173,14 @@ def from_rts_to_regression_table( ) waterline = regressions_percent_lower_limit - if regressions_percent_lower_limit < largest_variance: - note = "waterline={:.1f}%.".format(largest_variance) - waterline = largest_variance + # if regressions_percent_lower_limit < largest_variance: + # note = "waterline={:.1f}%.".format(largest_variance) + # waterline = largest_variance - except redis.exceptions.ResponseError: + except redis.exceptions.ResponseError as e: + logging.error( + "Detected a redis.exceptions.ResponseError. {}".format(e.__str__()) + ) pass except ZeroDivisionError as e: logging.error("Detected a ZeroDivisionError. {}".format(e.__str__())) @@ -1198,7 +1222,7 @@ def from_rts_to_regression_table( else: # lower-better percentage_change = ( - float(baseline_v) / float(comparison_v) - 1 + -(float(baseline_v) - float(comparison_v)) / float(baseline_v) ) * 100.0 else: logging.warn( @@ -1280,16 +1304,27 @@ def from_rts_to_regression_table( logging.warning( "There were no datapoints both for baseline and comparison for test: {test_name}" ) - no_datapoints_list.append(test_name) + if test_name not in no_datapoints_list: + no_datapoints_list.append(test_name) logging.warning( f"There is a total of {len(no_datapoints_list)} tests without datapoints for baseline AND comparison" ) logging.info( f"There is a total of {len(comparison_only_list)} tests without datapoints for baseline" ) + print( + "No datapoint baseline regex={test_names_str}".format( + test_names_str="|".join(no_datapoints_baseline_list) + ) + ) logging.info( f"There is a total of {len(baseline_only_list)} tests without datapoints for comparison" ) + print( + "No datapoint comparison regex={test_names_str}".format( + test_names_str="|".join(no_datapoints_comparison_list) + ) + ) logging.info(f"There is a total of {len(unstable_list)} UNSTABLE tests") return ( detected_regressions, diff --git a/redis_benchmarks_specification/test-suites/defaults.yml b/redis_benchmarks_specification/test-suites/defaults.yml index 7de76819..f7a5ba14 100644 --- a/redis_benchmarks_specification/test-suites/defaults.yml +++ b/redis_benchmarks_specification/test-suites/defaults.yml @@ -10,15 +10,19 @@ exporter: - $."BEST RUN RESULTS".Totals."Latency" - $."BEST RUN RESULTS".Totals."Misses/sec" - $."BEST RUN RESULTS".Totals."Percentile Latencies"."p50.00" + - $."BEST RUN RESULTS".Totals."Percentile Latencies"."p99.00" - $."WORST RUN RESULTS".Totals."Ops/sec" - $."WORST RUN RESULTS".Totals."Latency" - $."WORST RUN RESULTS".Totals."Misses/sec" - $."WORST RUN RESULTS".Totals."Percentile Latencies"."p50.00" + - $."WORST RUN RESULTS".Totals."Percentile Latencies"."p99.00" - $."AGGREGATED AVERAGE RESULTS (3 runs)".Totals."Ops/sec" - $."AGGREGATED AVERAGE RESULTS (3 runs)".Totals."Latency" - $."AGGREGATED AVERAGE RESULTS (3 runs)".Totals."Misses/sec" - - $."AGGREGATED AVERAGE RESULTS (5 runs)".Totals."Percentile Latencies"."p50.00" + - $."AGGREGATED AVERAGE RESULTS (3 runs)".Totals."Percentile Latencies"."p50.00" + - $."AGGREGATED AVERAGE RESULTS (3 runs)".Totals."Percentile Latencies"."p99.00" - $."ALL STATS".Totals."Ops/sec" - $."ALL STATS".Totals."Latency" - $."ALL STATS".Totals."Misses/sec" - $."ALL STATS".Totals."Percentile Latencies"."p50.00" + - $."ALL STATS".Totals."Percentile Latencies"."p99.00" diff --git a/utils/summary.py b/utils/summary.py new file mode 100644 index 00000000..cb29be49 --- /dev/null +++ b/utils/summary.py @@ -0,0 +1,199 @@ +import os +import argparse +from ruamel.yaml import YAML +import collections + +# Command groups mapping +COMMAND_GROUPS = { + "string": ["set", "get", "append", "getbit", "setrange", "bitcount", "mget"], + "hash": [ + "hset", + "hget", + "hincrby", + "hmset", + "hdel", + "hscan", + "hexists", + "hkeys", + "hvals", + "hmget", + "hsetnx", + "hgetall", + ], + "list": ["lpush", "rpop", "lpop", "lrem", "lrange", "lindex", "lpos", "linsert"], + "set": [ + "sadd", + "smembers", + "sismember", + "sunion", + "sdiff", + "sinter", + "smismember", + "sscan", + ], + "sorted_set": [ + "zadd", + "zrange", + "zrevrange", + "zrangebyscore", + "zrevrangebyscore", + "zincrby", + "zrem", + "zscore", + "zrank", + "zunion", + "zunionstore", + "zrevrank", + "zscan", + "zcard", + ], + "stream": ["xadd", "xread"], + "geospatial": ["geosearch", "geopos", "geohash", "geodist"], + "key_management": [ + "expire", + "pexpire", + "ttl", + "expireat", + "touch", + "del", + "exists", + ], + "pubsub": ["ping", "hello"], + "scripting": ["eval", "evalsha"], + "transaction": ["multi", "exec"], + "hyperloglog": ["pfadd"], + "server_management": ["hello"], +} + + +def parse_arguments(arguments): + """ + Parses the memtier benchmark arguments to extract relevant parameters. + Specifically extracts the --command argument. + + Args: + arguments (str): The arguments string from the YAML file. + + Returns: + dict: A dictionary containing extracted parameters. + """ + params = {} + command = None + + for arg in arguments.split(): + if arg.startswith("--command="): + command = arg.split("=", 1)[1] + elif arg == "--command": + command = arguments.split()[arguments.split().index(arg) + 1] + + return command + + +def categorize_command(command): + """ + Categorize a Redis command into a command group. + + Args: + command (str): The Redis command. + + Returns: + str: The command group. + """ + for group, commands in COMMAND_GROUPS.items(): + if command in commands: + return group + return "unknown" + + +def summarize_yaml_file(yaml_file_path, command_summary, command_group_summary): + """ + Processes a single YAML file to extract the tested commands and groups. + + Args: + yaml_file_path (str): Path to the YAML file. + command_summary (dict): Dictionary to store the command summary. + command_group_summary (dict): Dictionary to store the command group summary. + """ + yaml = YAML() + yaml.preserve_quotes = True + + try: + with open(yaml_file_path, "r") as file: + config = yaml.load(file) + except Exception as e: + print(f"Error reading {yaml_file_path}: {e}") + return + + # Extract tested commands from 'tested-commands' + tested_commands = config.get("tested-commands", []) + for command in tested_commands: + command_summary["tested_commands"][command] += 1 + command_group = categorize_command(command) + command_group_summary[command_group] += 1 + + # Extract command from 'clientconfig.arguments' + arguments = config.get("clientconfig", {}).get("arguments", "") + if arguments: + command = parse_arguments(arguments) + if command: + command_summary["client_arguments_commands"][command] += 1 + command_group = categorize_command(command) + command_group_summary[command_group] += 1 + + +def summarize_directory(directory): + """ + Summarizes the commands and command groups across all YAML files in a directory. + + Args: + directory (str): Path to the directory containing YAML files. + """ + command_summary = { + "tested_commands": collections.Counter(), + "client_arguments_commands": collections.Counter(), + } + command_group_summary = collections.Counter() + + # Iterate over all YAML files in the directory + for filename in os.listdir(directory): + if filename.endswith(".yml") or filename.endswith(".yaml"): + yaml_file_path = os.path.join(directory, filename) + summarize_yaml_file(yaml_file_path, command_summary, command_group_summary) + + # Print summary + print("\nTested Commands Summary:") + for command, count in command_summary["tested_commands"].items(): + print(f"{command}: {count} occurrences") + + print("\nClient Arguments Commands Summary:") + for command, count in command_summary["client_arguments_commands"].items(): + print(f"{command}: {count} occurrences") + + print("\nCommand Group Summary:") + for group, count in command_group_summary.items(): + print(f"{group.capitalize()}: {count} occurrences") + + +def main(): + parser = argparse.ArgumentParser( + description="Summarize commands and command groups from YAML benchmark files." + ) + parser.add_argument( + "--directory", + type=str, + default="../redis_benchmarks_specification/test-suites/", + help="Path to the directory containing YAML test files.", + ) + + args = parser.parse_args() + directory = args.directory + + if not os.path.isdir(directory): + print(f"Directory {directory} does not exist.") + return + + summarize_directory(directory) + + +if __name__ == "__main__": + main()