diff --git a/commands-priority.json b/commands-priority.json index 0ddbb7bc..4e7b2e0f 100644 --- a/commands-priority.json +++ b/commands-priority.json @@ -1,155 +1,154 @@ { "get": 1, - "sismember": 2, - "hget": 3, - "mget": 4, - "evalsha": 5, - "expire": 6, - "getbit": 7, - "hgetall": 8, - "hmget": 9, - "hdel": 10, - "hmset": 11, - "set": 12, - "hset": 13, - "exists": 14, + "hget": 2, + "evalsha": 3, + "hgetall": 4, + "mget": 5, + "set": 6, + "expire": 7, + "hdel": 8, + "hmset": 9, + "hmget": 10, + "hset": 11, + "del": 12, + "exists": 13, + "ping": 14, "multi": 15, "exec": 16, - "smembers": 17, - "zadd": 18, - "del": 19, + "zadd": 17, + "hincrbyfloat": 18, + "xreadgroup": 19, "hincrby": 20, - "zscore": 21, - "setex": 22, - "sadd": 23, - "publish": 24, - "hincrbyfloat": 25, - "zrangebyscore": 26, - "zrange": 27, - "incrbyfloat": 28, - "time": 29, - "incr": 30, - "ping": 31, - "lrange": 32, - "zcard": 33, - "hello": 34, - "pexpire": 35, - "ttl": 36, - "rpoplpush": 37, - "llen": 38, - "eval": 39, - "rpush": 40, - "zrem": 41, - "hexists": 42, - "srem": 43, - "psetex": 44, - "incrby": 45, - "zremrangebyscore": 46, - "xadd": 47, - "expireat": 48, - "pfadd": 49, - "touch": 50, - "zrevrank": 51, - "scard": 52, - "lpop": 53, - "hlen": 54, - "getset": 55, - "xreadgroup": 56, - "spop": 57, - "randomkey": 58, - "scan": 59, - "lpush": 60, - "getex": 61, - "xrange": 62, - "zincrby": 63, - "rpop": 64, - "zcount": 65, - "zrank": 66, - "replconf": 67, - "pexpireat": 68, + "incrbyfloat": 21, + "zrangebyscore": 22, + "setex": 23, + "zscore": 24, + "smembers": 25, + "publish": 26, + "hello": 27, + "touch": 28, + "incr": 29, + "eval": 30, + "zrange": 31, + "rpoplpush": 32, + "incrby": 33, + "rpush": 34, + "llen": 35, + "sadd": 36, + "zcard": 37, + "ttl": 38, + "pexpire": 39, + "hexists": 40, + "zremrangebyscore": 41, + "lrange": 42, + "sismember": 43, + "zrem": 44, + "brpop": 45, + "xadd": 46, + "getset": 47, + "hlen": 48, + "lpop": 49, + "replconf": 50, + "hscan": 51, + "psetex": 52, + "spop": 53, + "time": 54, + "pexpireat": 55, + "pttl": 56, + "lpush": 57, + "lrem": 58, + "zpopmin": 59, + "zrevrange": 60, + "rpop": 61, + "expireat": 62, + "xread": 63, + "scan": 64, + "zrevrank": 65, + "scard": 66, + "srem": 67, + "zrevrangebyscore": 68, "xrevrange": 69, - "lrem": 70, - "brpop": 71, - "xread": 72, - "pttl": 73, - "zrevrange": 74, - "zpopmin": 75, - "zrevrangebyscore": 76, - "ltrim": 77, - "hscan": 78, - "xpending": 79, - "smismember": 80, - "lmove": 81, - "hvals": 82, - "xack": 83, - "hsetnx": 84, - "unlink": 85, - "brpoplpush": 86, - "select": 87, - "setnx": 88, - "mset": 89, - "type": 90, - "lindex": 91, - "decr": 92, - "hkeys": 93, - "zremrangebyrank": 94, - "setbit": 95, - "xdel": 96, - "georadius_ro": 97, - "getdel": 98, - "blpop": 99, - "persist": 100, - "rpushx": 101, - "echo": 102, - "blmove": 103, - "discard": 104, - "getrange": 105, - "sinterstore": 106, - "bzpopmin": 107, - "decrby": 108, - "xlen": 109, - "xtrim": 110, - "zscan": 111, - "pfcount": 112, - "client": 113, - "sscan": 114, - "restore": 115, - "srandmember": 116, - "rename": 117, - "pubsub": 118, - "fcall_ro": 119, - "geoadd": 120, - "renamenx": 121, - "bitfield": 122, - "xclaim": 123, - "smove": 124, - "xautoclaim": 125, - "lset": 126, - "append": 127, - "zrangebylex": 128, - "dump": 129, - "dbsize": 130, - "zpopmax": 131, - "lpos": 132, - "zunion": 133, - "sinter": 134, - "sunionstore": 135, - "zmscore": 136, - "zinterstore": 137, - "sunion": 138, - "bitpos": 139, - "linsert": 140, - "strlen": 141, - "sdiffstore": 142, - "hrandfield": 143, - "geopos": 144, - "bitcount": 145, - "zunionstore": 146, - "setrange": 147, - "sdiff": 148, - "georadius": 149, - "msetnx": 150, - "hexpire": 151, - "geosearch": 152, - "expiretime": 153 + "xpending": 70, + "xack": 71, + "lmove": 72, + "zcount": 73, + "mset": 74, + "xrange": 75, + "hvals": 76, + "setnx": 77, + "unlink": 78, + "brpoplpush": 79, + "select": 80, + "zincrby": 81, + "georadius_ro": 82, + "xdel": 83, + "ltrim": 84, + "decr": 85, + "zremrangebyrank": 86, + "bzpopmin": 87, + "getbit": 88, + "getdel": 89, + "lindex": 90, + "echo": 91, + "hsetnx": 92, + "getex": 93, + "pfadd": 94, + "hkeys": 95, + "xtrim": 96, + "blpop": 97, + "blmove": 98, + "zrank": 99, + "xlen": 100, + "hexpire": 101, + "persist": 102, + "discard": 103, + "sscan": 104, + "setbit": 105, + "xautoclaim": 106, + "client": 107, + "pfcount": 108, + "type": 109, + "geoadd": 110, + "restore": 111, + "xclaim": 112, + "srandmember": 113, + "zrangebylex": 114, + "decrby": 115, + "bitfield": 116, + "zscan": 117, + "fcall_ro": 118, + "lset": 119, + "smismember": 120, + "rpushx": 121, + "pubsub": 122, + "rename": 123, + "append": 124, + "bitpos": 125, + "lpos": 126, + "sunion": 127, + "zmscore": 128, + "randomkey": 129, + "sinter": 130, + "getrange": 131, + "bitcount": 132, + "dump": 133, + "zinterstore": 134, + "zpopmax": 135, + "strlen": 136, + "sinterstore": 137, + "zrandmember": 138, + "evalsha_ro": 139, + "georadius": 140, + "sunionstore": 141, + "hrandfield": 142, + "zunionstore": 143, + "linsert": 144, + "zunion": 145, + "geopos": 146, + "renamenx": 147, + "setrange": 148, + "quit": 149, + "dbsize": 150, + "msetnx": 151, + "pexpiretime": 152 } \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index d053285d..03efcbc5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "redis-benchmarks-specification" -version = "0.1.314" +version = "0.1.323" description = "The Redis benchmarks specification describes the cross-language/tools requirements and expectations to foster performance and observability standards around redis related technologies. Members from both industry and academia, including organizations and individuals are encouraged to contribute." authors = ["filipecosta90 ","Redis Performance Group "] readme = "Readme.md" diff --git a/redis_benchmarks_specification/__builder__/builder.py b/redis_benchmarks_specification/__builder__/builder.py index ddd34045..e91bed69 100644 --- a/redis_benchmarks_specification/__builder__/builder.py +++ b/redis_benchmarks_specification/__builder__/builder.py @@ -73,6 +73,18 @@ def main(): parser.add_argument( "--arch", type=str, default="amd64", help="arch to build artifacts" ) + parser.add_argument( + "--builder-group", + type=str, + default=STREAM_GH_EVENTS_COMMIT_BUILDERS_CG, + help="Consumer group name to read from the stream", + ) + parser.add_argument( + "--builder-id", + type=str, + default="1", + help="Consumer id to read from the stream", + ) parser.add_argument( "--setups-folder", type=str, @@ -149,7 +161,14 @@ def main(): build_spec_image_prefetch(builders_folder, different_build_specs) - builder_consumer_group_create(conn) + builder_group = args.builder_group + builder_id = args.builder_id + if builder_group is None: + builder_group = STREAM_GH_EVENTS_COMMIT_BUILDERS_CG + if builder_id is None: + builder_id = "1" + + builder_consumer_group_create(conn, builder_group) if args.github_token is not None: logging.info("detected a github token. will update as much as possible!!! =)") previous_id = args.consumer_start_id @@ -162,28 +181,26 @@ def main(): args.docker_air_gap, arch, args.github_token, + builder_group, + builder_id, ) -def builder_consumer_group_create(conn, id="$"): +def builder_consumer_group_create( + conn, builder_group=STREAM_GH_EVENTS_COMMIT_BUILDERS_CG, id="$" +): try: conn.xgroup_create( STREAM_KEYNAME_GH_EVENTS_COMMIT, - STREAM_GH_EVENTS_COMMIT_BUILDERS_CG, + builder_group, mkstream=True, id=id, ) logging.info( - "Created consumer group named {} to distribute work.".format( - STREAM_GH_EVENTS_COMMIT_BUILDERS_CG - ) + "Created consumer group named {} to distribute work.".format(builder_group) ) except redis.exceptions.ResponseError: - logging.info( - "Consumer group named {} already existed.".format( - STREAM_GH_EVENTS_COMMIT_BUILDERS_CG - ) - ) + logging.info("Consumer group named {} already existed.".format(builder_group)) def check_benchmark_build_comment(comments): @@ -205,14 +222,22 @@ def builder_process_stream( docker_air_gap=False, arch="amd64", github_token=None, + builder_group=None, + builder_id=None, ): new_builds_count = 0 auto_approve_github_comments = True build_stream_fields_arr = [] - logging.info("Entering blocking read waiting for work.") - consumer_name = "{}-proc#{}".format(STREAM_GH_EVENTS_COMMIT_BUILDERS_CG, "1") + if builder_group is None: + builder_group = STREAM_GH_EVENTS_COMMIT_BUILDERS_CG + if builder_id is None: + builder_id = "1" + consumer_name = "{}-proc#{}".format(builder_group, builder_id) + logging.info( + f"Entering blocking read waiting for work. building for arch: {arch}. Using consumer id {consumer_name}" + ) newTestInfo = conn.xreadgroup( - STREAM_GH_EVENTS_COMMIT_BUILDERS_CG, + builder_group, consumer_name, {STREAM_KEYNAME_GH_EVENTS_COMMIT: previous_id}, count=1, @@ -230,6 +255,21 @@ def builder_process_stream( docker_client = docker.from_env() from pathlib import Path + build_request_arch = None + if b"arch" in testDetails: + build_request_arch = testDetails[b"arch"].decode() + elif b"build_arch" in testDetails: + build_request_arch = testDetails[b"build_arch"].decode() + else: + logging.info("No arch info found on the stream.") + if build_request_arch is not None and build_request_arch != arch: + logging.info( + "skipping build request given requested build arch {}!={}".format( + build_request_arch, arch + ) + ) + return previous_id, new_builds_count, build_stream_fields_arr + home = str(Path.home()) if b"git_hash" in testDetails: git_hash = testDetails[b"git_hash"] diff --git a/redis_benchmarks_specification/__cli__/args.py b/redis_benchmarks_specification/__cli__/args.py index d2ff7243..ebbf00a5 100644 --- a/redis_benchmarks_specification/__cli__/args.py +++ b/redis_benchmarks_specification/__cli__/args.py @@ -138,7 +138,7 @@ def spec_cli_args(parser): parser.add_argument("--gh_repo", type=str, default="redis") parser.add_argument("--server_name", type=str, default=None) parser.add_argument("--run_image", type=str, default="redis") - parser.add_argument("--build_arch", type=str, default="amd64") + parser.add_argument("--build_arch", type=str, default=None) parser.add_argument("--id", type=str, default="dockerhub") parser.add_argument("--mnt_point", type=str, default="") parser.add_argument("--trigger-unstable-commits", type=bool, default=True) diff --git a/redis_benchmarks_specification/__cli__/cli.py b/redis_benchmarks_specification/__cli__/cli.py index 92970372..c0331c9e 100644 --- a/redis_benchmarks_specification/__cli__/cli.py +++ b/redis_benchmarks_specification/__cli__/cli.py @@ -432,6 +432,9 @@ def trigger_tests_cli_command_logic(args, project_name, project_version): commit_dict["tests_groups_regexp"] = tests_groups_regexp commit_dict["github_org"] = args.gh_org commit_dict["github_repo"] = args.gh_repo + if args.build_arch is not None: + commit_dict["build_arch"] = args.build_arch + commit_dict["arch"] = args.build_arch if args.server_name is not None and args.server_name != "": commit_dict["server_name"] = args.server_name if args.build_artifacts != "": diff --git a/redis_benchmarks_specification/__cli__/stats.py b/redis_benchmarks_specification/__cli__/stats.py index 762373f6..97e81899 100644 --- a/redis_benchmarks_specification/__cli__/stats.py +++ b/redis_benchmarks_specification/__cli__/stats.py @@ -40,6 +40,28 @@ def clean_number(value): return 0 # Default to 0 if invalid +def clean_percentage(value): + """Parse percentage values like '17.810220866%'""" + try: + value = value.replace("%", "").strip() + return float(value) + except ValueError: + logging.error(f"Skipping invalid percentage value: {value}") + return 0.0 + + +def format_number_with_suffix(value): + """Format large numbers with B/M/K suffixes for readability""" + if value >= 1_000_000_000: + return f"{value / 1_000_000_000:.1f}B" + elif value >= 1_000_000: + return f"{value / 1_000_000:.1f}M" + elif value >= 1_000: + return f"{value / 1_000:.1f}K" + else: + return str(value) + + def get_arg_value(args, flag, default): """Extract integer values safely from CLI arguments""" if flag in args: @@ -93,6 +115,35 @@ def generate_stats_cli_command_logic(args, project_name, project_version): priority_json = json.load(fd) tracked_groups = [] tracked_groups_hist = {} + + # ACL category tracking for benchmark YAML files + benchmark_read_commands = {} + benchmark_write_commands = {} + benchmark_fast_commands = {} + benchmark_slow_commands = {} + benchmark_total_command_count = 0 + + # Group-based read/write tracking for benchmarks + benchmark_group_read = {} # group -> count + benchmark_group_write = {} # group -> count + benchmark_group_total = {} # group -> total count + + # ACL category tracking for commandstats CSV + csv_read_commands = {} + csv_write_commands = {} + csv_fast_commands = {} + csv_slow_commands = {} + csv_total_command_count = 0 + + # Group-based read/write tracking for CSV + csv_group_read = {} # group -> count + csv_group_write = {} # group -> count + csv_group_total = {} # group -> total count + + # Percentage validation tracking + csv_provided_percentages = {} # command -> provided percentage + csv_original_counts = {} # command -> original count from CSV + override_enabled = args.override_tests fail_on_required_diff = args.fail_on_required_diff overall_result = True @@ -252,6 +303,75 @@ def generate_stats_cli_command_logic(args, project_name, project_version): tracked_groups_hist[group] = 0 tracked_groups_hist[group] = tracked_groups_hist[group] + 1 + # Track ACL categories for read/write and fast/slow analysis + if "acl_categories" in command_json: + acl_categories = command_json["acl_categories"] + benchmark_total_command_count += 1 + + # Track total by group (all commands) + if group not in benchmark_group_total: + benchmark_group_total[group] = 0 + benchmark_group_total[group] += 1 + + # Track read/write commands + is_read = False + is_write = False + + if "@read" in acl_categories: + is_read = True + elif "@write" in acl_categories: + is_write = True + elif "_ro" in command.lower(): + # Commands with _ro suffix are read-only (like EVALSHA_RO) + is_read = True + elif "@pubsub" in acl_categories: + # Pubsub commands: SUBSCRIBE/UNSUBSCRIBE are read, PUBLISH is write + if command.lower() in [ + "subscribe", + "unsubscribe", + "psubscribe", + "punsubscribe", + ]: + is_read = True + else: + is_write = ( + True # PUBLISH and other pubsub commands + ) + else: + # Commands without explicit read/write ACL but not _ro are assumed write + # This covers cases like EVALSHA which can modify data + is_write = True + + if is_read: + if command not in benchmark_read_commands: + benchmark_read_commands[command] = 0 + benchmark_read_commands[command] += 1 + + # Track by group + if group not in benchmark_group_read: + benchmark_group_read[group] = 0 + benchmark_group_read[group] += 1 + + elif is_write: + if command not in benchmark_write_commands: + benchmark_write_commands[command] = 0 + benchmark_write_commands[command] += 1 + + # Track by group + if group not in benchmark_group_write: + benchmark_group_write[group] = 0 + benchmark_group_write[group] += 1 + + # Track fast/slow commands + if "@fast" in acl_categories: + if command not in benchmark_fast_commands: + benchmark_fast_commands[command] = 0 + benchmark_fast_commands[command] += 1 + elif "@slow" in acl_categories: + if command not in benchmark_slow_commands: + benchmark_slow_commands[command] = 0 + benchmark_slow_commands[command] += 1 + # Calculate total connections total_connections = clients * threads @@ -428,6 +548,15 @@ def generate_stats_cli_command_logic(args, project_name, project_version): if len(row) > 2: usecs = clean_number(row[2]) total_usecs += usecs + + # Parse percentage and original count if available + provided_percentage = None + original_count = None + if len(row) > 3: + provided_percentage = clean_percentage(row[3]) + if len(row) > 4: + original_count = clean_number(row[4]) + if count == 0: continue tracked = False @@ -452,11 +581,97 @@ def generate_stats_cli_command_logic(args, project_name, project_version): if "deprecated_since" in command_json: deprecated = True + # Track ACL categories for commandstats CSV data + if "acl_categories" in command_json: + acl_categories = command_json["acl_categories"] + + # Use original count if available, otherwise use parsed count + tracking_count = ( + original_count if original_count is not None else count + ) + csv_total_command_count += tracking_count + + # Track total by group (all commands) + if group not in csv_group_total: + csv_group_total[group] = 0 + csv_group_total[group] += tracking_count + + # Track read/write commands + is_read = False + is_write = False + + if "@read" in acl_categories: + is_read = True + elif "@write" in acl_categories: + is_write = True + elif "_ro" in cmd.lower(): + # Commands with _ro suffix are read-only (like EVALSHA_RO) + is_read = True + elif "@pubsub" in acl_categories: + # Pubsub commands: SUBSCRIBE/UNSUBSCRIBE are read, PUBLISH is write + if cmd.lower() in [ + "subscribe", + "unsubscribe", + "psubscribe", + "punsubscribe", + ]: + is_read = True + else: + is_write = True # PUBLISH and other pubsub commands + else: + # Commands without explicit read/write ACL but not _ro are assumed write + # This covers cases like EVALSHA which can modify data + is_write = True + + if is_read: + if cmd.lower() not in csv_read_commands: + csv_read_commands[cmd.lower()] = 0 + csv_read_commands[cmd.lower()] += tracking_count + + # Track by group + if group not in csv_group_read: + csv_group_read[group] = 0 + csv_group_read[group] += tracking_count + + elif is_write: + if cmd.lower() not in csv_write_commands: + csv_write_commands[cmd.lower()] = 0 + csv_write_commands[cmd.lower()] += tracking_count + + # Track by group + if group not in csv_group_write: + csv_group_write[group] = 0 + csv_group_write[group] += tracking_count + + # Track fast/slow commands + if "@fast" in acl_categories: + if cmd.lower() not in csv_fast_commands: + csv_fast_commands[cmd.lower()] = 0 + csv_fast_commands[cmd.lower()] += tracking_count + elif "@slow" in acl_categories: + if cmd.lower() not in csv_slow_commands: + csv_slow_commands[cmd.lower()] = 0 + csv_slow_commands[cmd.lower()] += tracking_count + if module is False or include_modules: - priority[cmd.lower()] = count + # Use original count if available and different from parsed count + final_count = count + if original_count is not None and original_count != count: + logging.warning( + f"Using original count for {cmd}: {original_count:,} instead of parsed {count:,}" + ) + final_count = original_count + + priority[cmd.lower()] = final_count if type(usecs) == int: priority_usecs[cmd.lower()] = usecs + # Store percentage and original count for validation + if provided_percentage is not None: + csv_provided_percentages[cmd.lower()] = provided_percentage + if original_count is not None: + csv_original_counts[cmd.lower()] = original_count + if cmdstat in tracked_commands_json: tracked = True if module is False or include_modules: @@ -651,6 +866,249 @@ def generate_stats_cli_command_logic(args, project_name, project_version): logging.info( f"There is a total of : {len(list(tracked_commands_json.keys()))} tracked commands." ) + + # ACL Category Analysis Summary + logging.info("=" * 80) + logging.info("ACL CATEGORY ANALYSIS SUMMARY") + logging.info("=" * 80) + + # Benchmark YAML files analysis + if benchmark_total_command_count > 0: + logging.info("BENCHMARK TEST SUITES ANALYSIS (from YAML files):") + logging.info("-" * 50) + + # Calculate read/write percentages for benchmarks + benchmark_read_count = sum(benchmark_read_commands.values()) + benchmark_write_count = sum(benchmark_write_commands.values()) + benchmark_rw_count = benchmark_read_count + benchmark_write_count + + if benchmark_rw_count > 0: + read_percentage = (benchmark_read_count / benchmark_rw_count) * 100 + write_percentage = (benchmark_write_count / benchmark_rw_count) * 100 + + logging.info(f"READ/WRITE COMMAND DISTRIBUTION:") + logging.info( + f" Read commands: {benchmark_read_count:6d} ({read_percentage:5.1f}%)" + ) + logging.info( + f" Write commands: {benchmark_write_count:6d} ({write_percentage:5.1f}%)" + ) + logging.info(f" Total R/W: {benchmark_rw_count:6d} (100.0%)") + else: + logging.info("No read/write commands detected in benchmark ACL categories") + + # Calculate fast/slow percentages for benchmarks + benchmark_fast_count = sum(benchmark_fast_commands.values()) + benchmark_slow_count = sum(benchmark_slow_commands.values()) + benchmark_fs_count = benchmark_fast_count + benchmark_slow_count + + if benchmark_fs_count > 0: + fast_percentage = (benchmark_fast_count / benchmark_fs_count) * 100 + slow_percentage = (benchmark_slow_count / benchmark_fs_count) * 100 + + logging.info(f"") + logging.info(f"FAST/SLOW COMMAND DISTRIBUTION:") + logging.info( + f" Fast commands: {benchmark_fast_count:6d} ({fast_percentage:5.1f}%)" + ) + logging.info( + f" Slow commands: {benchmark_slow_count:6d} ({slow_percentage:5.1f}%)" + ) + logging.info(f" Total F/S: {benchmark_fs_count:6d} (100.0%)") + else: + logging.info("No fast/slow commands detected in benchmark ACL categories") + + # Group breakdown for benchmarks + if benchmark_group_total: + logging.info("") + logging.info("READ/WRITE BREAKDOWN BY COMMAND GROUP:") + + # Calculate total calls across all groups + total_all_calls = sum(benchmark_group_total.values()) + + # Create list of groups with their total calls for sorting + group_data = [] + for group, total_group in benchmark_group_total.items(): + read_count = benchmark_group_read.get(group, 0) + write_count = benchmark_group_write.get(group, 0) + group_data.append((group, read_count, write_count, total_group)) + + # Sort by total calls (descending) + group_data.sort(key=lambda x: x[3], reverse=True) + + total_read_all = 0 + total_write_all = 0 + + for group, read_count, write_count, total_group in group_data: + group_pct = (total_group / total_all_calls) * 100 + read_pct = (read_count / total_group) * 100 if total_group > 0 else 0 + write_pct = (write_count / total_group) * 100 if total_group > 0 else 0 + + read_formatted = format_number_with_suffix(read_count) + write_formatted = format_number_with_suffix(write_count) + + logging.info( + f" {group.upper():>12} ({group_pct:4.1f}%): {read_formatted:>8} read ({read_pct:5.1f}%), {write_formatted:>8} write ({write_pct:5.1f}%)" + ) + + total_read_all += read_count + total_write_all += write_count + + # Add total row + if group_data: + total_read_pct = (total_read_all / total_all_calls) * 100 + total_write_pct = (total_write_all / total_all_calls) * 100 + total_read_formatted = format_number_with_suffix(total_read_all) + total_write_formatted = format_number_with_suffix(total_write_all) + + logging.info( + f" {'TOTAL':>12} (100.0%): {total_read_formatted:>8} read ({total_read_pct:5.1f}%), {total_write_formatted:>8} write ({total_write_pct:5.1f}%)" + ) + else: + logging.info( + "BENCHMARK TEST SUITES ANALYSIS: No commands with ACL categories found" + ) + + # CommandStats CSV analysis + if csv_total_command_count > 0: + logging.info("") + logging.info("COMMANDSTATS CSV ANALYSIS (actual Redis usage):") + logging.info("-" * 50) + + # Calculate read/write percentages for CSV data + csv_read_count = sum(csv_read_commands.values()) + csv_write_count = sum(csv_write_commands.values()) + csv_rw_count = csv_read_count + csv_write_count + + if csv_rw_count > 0: + read_percentage = (csv_read_count / csv_rw_count) * 100 + write_percentage = (csv_write_count / csv_rw_count) * 100 + + logging.info(f"READ/WRITE COMMAND DISTRIBUTION:") + logging.info( + f" Read commands: {csv_read_count:8d} ({read_percentage:5.1f}%)" + ) + logging.info( + f" Write commands: {csv_write_count:8d} ({write_percentage:5.1f}%)" + ) + logging.info(f" Total R/W: {csv_rw_count:8d} (100.0%)") + else: + logging.info("No read/write commands detected in CSV ACL categories") + + # Calculate fast/slow percentages for CSV data + csv_fast_count = sum(csv_fast_commands.values()) + csv_slow_count = sum(csv_slow_commands.values()) + csv_fs_count = csv_fast_count + csv_slow_count + + if csv_fs_count > 0: + fast_percentage = (csv_fast_count / csv_fs_count) * 100 + slow_percentage = (csv_slow_count / csv_fs_count) * 100 + + logging.info(f"") + logging.info(f"FAST/SLOW COMMAND DISTRIBUTION:") + logging.info( + f" Fast commands: {csv_fast_count:8d} ({fast_percentage:5.1f}%)" + ) + logging.info( + f" Slow commands: {csv_slow_count:8d} ({slow_percentage:5.1f}%)" + ) + logging.info(f" Total F/S: {csv_fs_count:8d} (100.0%)") + else: + logging.info("No fast/slow commands detected in CSV ACL categories") + + # Group breakdown for CSV data + if csv_group_total: + logging.info("") + logging.info("READ/WRITE BREAKDOWN BY COMMAND GROUP:") + + # Calculate total calls across all groups + total_all_calls = sum(csv_group_total.values()) + + # Create list of groups with their total calls for sorting + group_data = [] + for group, total_group in csv_group_total.items(): + read_count = csv_group_read.get(group, 0) + write_count = csv_group_write.get(group, 0) + group_data.append((group, read_count, write_count, total_group)) + + # Sort by total calls (descending) + group_data.sort(key=lambda x: x[3], reverse=True) + + total_read_all = 0 + total_write_all = 0 + + for group, read_count, write_count, total_group in group_data: + group_pct = (total_group / total_all_calls) * 100 + read_pct = (read_count / total_group) * 100 if total_group > 0 else 0 + write_pct = (write_count / total_group) * 100 if total_group > 0 else 0 + + read_formatted = format_number_with_suffix(read_count) + write_formatted = format_number_with_suffix(write_count) + + logging.info( + f" {group.upper():>12} ({group_pct:4.1f}%): {read_formatted:>8} read ({read_pct:5.1f}%), {write_formatted:>8} write ({write_pct:5.1f}%)" + ) + + total_read_all += read_count + total_write_all += write_count + + # Add total row + if group_data: + total_read_pct = (total_read_all / total_all_calls) * 100 + total_write_pct = (total_write_all / total_all_calls) * 100 + total_read_formatted = format_number_with_suffix(total_read_all) + total_write_formatted = format_number_with_suffix(total_write_all) + + logging.info( + f" {'TOTAL':>12} (100.0%): {total_read_formatted:>8} read ({total_read_pct:5.1f}%), {total_write_formatted:>8} write ({total_write_pct:5.1f}%)" + ) + + # Validate parsing accuracy by comparing with provided percentages + if csv_provided_percentages and csv_original_counts: + logging.info("") + logging.info("PARSING VALIDATION:") + logging.info("-" * 30) + + # Calculate total from original counts + total_original = sum(csv_original_counts.values()) + total_provided_percentage = sum(csv_provided_percentages.values()) + + logging.info(f"Total original count: {total_original:,}") + logging.info( + f"Sum of provided percentages: {total_provided_percentage:.6f}%" + ) + + # Check if our billion parsing matches original counts + parsing_errors = 0 + for cmd in csv_original_counts: + if cmd in priority: # priority contains our parsed values + parsed_value = priority[cmd] + original_value = csv_original_counts[cmd] + if parsed_value != original_value: + parsing_errors += 1 + logging.warning( + f"Parsing mismatch for {cmd}: parsed={parsed_value:,} vs original={original_value:,}" + ) + + if parsing_errors == 0: + logging.info("✓ All billion/million/thousand parsing is accurate") + else: + logging.warning(f"✗ Found {parsing_errors} parsing errors") + + # Validate percentage calculation + if abs(total_provided_percentage - 100.0) < 0.001: + logging.info("✓ Provided percentages sum to 100%") + else: + logging.warning( + f"✗ Provided percentages sum to {total_provided_percentage:.6f}% (not 100%)" + ) + else: + logging.info("") + logging.info( + "COMMANDSTATS CSV ANALYSIS: No CSV file provided or no commands found" + ) + + logging.info("=" * 80) # Save pipeline count to CSV csv_filename = "memtier_pipeline_count.csv" with open(csv_filename, "w", newline="") as csvfile: @@ -714,3 +1172,133 @@ def generate_stats_cli_command_logic(args, project_name, project_version): ) logging.info(f"Sorted command groups count data saved to {csv_filename}") + + # Save ACL category data to CSV files + + # Benchmark data CSV files + csv_filename = "benchmark_acl_read_write_commands.csv" + with open(csv_filename, "w", newline="") as csvfile: + fieldnames = ["command", "type", "count"] + writer = csv.DictWriter(csvfile, fieldnames=fieldnames) + writer.writeheader() + + for command, count in sorted(benchmark_read_commands.items()): + writer.writerow({"command": command, "type": "read", "count": count}) + for command, count in sorted(benchmark_write_commands.items()): + writer.writerow({"command": command, "type": "write", "count": count}) + + logging.info(f"Benchmark ACL read/write commands data saved to {csv_filename}") + + csv_filename = "benchmark_acl_fast_slow_commands.csv" + with open(csv_filename, "w", newline="") as csvfile: + fieldnames = ["command", "type", "count"] + writer = csv.DictWriter(csvfile, fieldnames=fieldnames) + writer.writeheader() + + for command, count in sorted(benchmark_fast_commands.items()): + writer.writerow({"command": command, "type": "fast", "count": count}) + for command, count in sorted(benchmark_slow_commands.items()): + writer.writerow({"command": command, "type": "slow", "count": count}) + + logging.info(f"Benchmark ACL fast/slow commands data saved to {csv_filename}") + + # CommandStats CSV data files + csv_filename = "commandstats_acl_read_write_commands.csv" + with open(csv_filename, "w", newline="") as csvfile: + fieldnames = ["command", "type", "count"] + writer = csv.DictWriter(csvfile, fieldnames=fieldnames) + writer.writeheader() + + for command, count in sorted(csv_read_commands.items()): + writer.writerow({"command": command, "type": "read", "count": count}) + for command, count in sorted(csv_write_commands.items()): + writer.writerow({"command": command, "type": "write", "count": count}) + + logging.info(f"CommandStats ACL read/write commands data saved to {csv_filename}") + + csv_filename = "commandstats_acl_fast_slow_commands.csv" + with open(csv_filename, "w", newline="") as csvfile: + fieldnames = ["command", "type", "count"] + writer = csv.DictWriter(csvfile, fieldnames=fieldnames) + writer.writeheader() + + for command, count in sorted(csv_fast_commands.items()): + writer.writerow({"command": command, "type": "fast", "count": count}) + for command, count in sorted(csv_slow_commands.items()): + writer.writerow({"command": command, "type": "slow", "count": count}) + + logging.info(f"CommandStats ACL fast/slow commands data saved to {csv_filename}") + + # Save group breakdown data to CSV files + + # Benchmark group breakdown + csv_filename = "benchmark_group_read_write_breakdown.csv" + with open(csv_filename, "w", newline="") as csvfile: + fieldnames = [ + "group", + "read_count", + "write_count", + "total_count", + "read_percentage", + "write_percentage", + ] + writer = csv.DictWriter(csvfile, fieldnames=fieldnames) + writer.writeheader() + + all_groups = set(benchmark_group_read.keys()) | set( + benchmark_group_write.keys() + ) + for group in sorted(all_groups): + read_count = benchmark_group_read.get(group, 0) + write_count = benchmark_group_write.get(group, 0) + total_count = read_count + write_count + read_pct = (read_count / total_count * 100) if total_count > 0 else 0 + write_pct = (write_count / total_count * 100) if total_count > 0 else 0 + + writer.writerow( + { + "group": group, + "read_count": read_count, + "write_count": write_count, + "total_count": total_count, + "read_percentage": round(read_pct, 2), + "write_percentage": round(write_pct, 2), + } + ) + + logging.info(f"Benchmark group read/write breakdown saved to {csv_filename}") + + # CommandStats group breakdown + csv_filename = "commandstats_group_read_write_breakdown.csv" + with open(csv_filename, "w", newline="") as csvfile: + fieldnames = [ + "group", + "read_count", + "write_count", + "total_count", + "read_percentage", + "write_percentage", + ] + writer = csv.DictWriter(csvfile, fieldnames=fieldnames) + writer.writeheader() + + all_groups = set(csv_group_read.keys()) | set(csv_group_write.keys()) + for group in sorted(all_groups): + read_count = csv_group_read.get(group, 0) + write_count = csv_group_write.get(group, 0) + total_count = read_count + write_count + read_pct = (read_count / total_count * 100) if total_count > 0 else 0 + write_pct = (write_count / total_count * 100) if total_count > 0 else 0 + + writer.writerow( + { + "group": group, + "read_count": read_count, + "write_count": write_count, + "total_count": total_count, + "read_percentage": round(read_pct, 2), + "write_percentage": round(write_pct, 2), + } + ) + + logging.info(f"CommandStats group read/write breakdown saved to {csv_filename}") diff --git a/redis_benchmarks_specification/__self_contained_coordinator__/docker.py b/redis_benchmarks_specification/__self_contained_coordinator__/docker.py index f1f4a12a..69777522 100644 --- a/redis_benchmarks_specification/__self_contained_coordinator__/docker.py +++ b/redis_benchmarks_specification/__self_contained_coordinator__/docker.py @@ -15,7 +15,7 @@ def generate_standalone_redis_server_args( redis_arguments="", password=None, ): - added_params = ["port", "protected-mode", "dir", "requirepass"] + added_params = ["port", "protected-mode", "dir", "requirepass", "logfile"] # start redis-server command = [ binary, @@ -31,6 +31,7 @@ def generate_standalone_redis_server_args( logging.info("Redis server will be started with password authentication") if dbdir != "": command.extend(["--dir", dbdir]) + command.extend(["--logfile", f"{dbdir}redis.log"]) if configuration_parameters is not None: for parameter, parameter_value in configuration_parameters.items(): if parameter not in added_params: diff --git a/redis_benchmarks_specification/__self_contained_coordinator__/self_contained_coordinator.py b/redis_benchmarks_specification/__self_contained_coordinator__/self_contained_coordinator.py index 1488cb3e..10958ab7 100644 --- a/redis_benchmarks_specification/__self_contained_coordinator__/self_contained_coordinator.py +++ b/redis_benchmarks_specification/__self_contained_coordinator__/self_contained_coordinator.py @@ -107,6 +107,43 @@ from redis_benchmarks_specification.__self_contained_coordinator__.build_info import ( extract_build_info_from_streamdata, ) + + +def print_directory_logs(directory_path, description=""): + """Print all .log files in a directory for debugging purposes.""" + if not os.path.exists(directory_path): + logging.warning(f"Directory {directory_path} does not exist") + return + + logging.info( + f"Printing all .log files in {description} directory: {directory_path}" + ) + try: + for root, dirs, files in os.walk(directory_path): + for file in files: + # Only process .log files + if not file.endswith(".log"): + continue + + file_path = os.path.join(root, file) + logging.info(f"Found log file: {file_path}") + try: + # Try to read and print the log file content + with open(file_path, "r", encoding="utf-8", errors="ignore") as f: + content = f.read() + if content.strip(): # Only print non-empty files + logging.info(f"Content of {file_path}:") + logging.info("-" * 40) + logging.info(content) + logging.info("-" * 40) + else: + logging.info(f"Log file {file_path} is empty") + except Exception as e: + logging.warning(f"Could not read log file {file_path}: {e}") + except Exception as e: + logging.error(f"Error walking directory {directory_path}: {e}") + + from redis_benchmarks_specification.__self_contained_coordinator__.cpuset import ( extract_db_cpu_limit, generate_cpuset_cpus, @@ -1319,6 +1356,18 @@ def process_self_contained_coordinator_stream( print("-" * 60) + # Print all log files in the temporary directories for debugging + logging.critical( + "Printing all files in temporary directories for debugging..." + ) + try: + print_directory_logs(temporary_dir, "Redis server") + print_directory_logs(temporary_dir_client, "Client") + except Exception as log_error: + logging.error( + f"Failed to print directory logs: {log_error}" + ) + test_result = False # tear-down logging.info("Tearing down setup") @@ -1349,14 +1398,27 @@ def process_self_contained_coordinator_stream( ) ) pass - logging.info( - "Removing temporary dirs {} and {}".format( - temporary_dir, temporary_dir_client - ) - ) - shutil.rmtree(temporary_dir, ignore_errors=True) - shutil.rmtree(temporary_dir_client, ignore_errors=True) + # Only remove temporary directories if test passed + if test_result: + logging.info( + "Test passed. Removing temporary dirs {} and {}".format( + temporary_dir, temporary_dir_client + ) + ) + shutil.rmtree(temporary_dir, ignore_errors=True) + shutil.rmtree( + temporary_dir_client, ignore_errors=True + ) + else: + logging.warning( + "Test failed. Preserving temporary dirs for debugging: {} and {}".format( + temporary_dir, temporary_dir_client + ) + ) + # Print all log files in the temporary directories for debugging + print_directory_logs(temporary_dir, "Redis server") + print_directory_logs(temporary_dir_client, "Client") overall_result &= test_result