Merge pull request redis#214 from redis/improve.geo.benchmarks

slice4e · web-flow · commit c0ea3b25ece1 · 2023-02-16T09:34:46.000-07:00
Updated geo benchmarks to fully saturate one primary. Include zset benchmarks with long score
diff --git a/commands-priority.json b/commands-priority.json
diff --git a/redis_benchmarks_specification/__cli__/args.py b/redis_benchmarks_specification/__cli__/args.py
@@ -46,6 +46,7 @@ def spec_cli_args(parser):
         help="Include modules statistics on commandstats.",
     )
     parser.add_argument("--summary-csv", type=str, default="")
+    parser.add_argument("--group-csv", type=str, default="")
     parser.add_argument("--commands-json-file", type=str, default="./commands.json")
     parser.add_argument(
         "--commands-priority-file", type=str, default="./commands-priority.json"
diff --git a/redis_benchmarks_specification/__cli__/stats.py b/redis_benchmarks_specification/__cli__/stats.py
@@ -4,6 +4,7 @@
 
 import redis
 import oyaml as yaml
+import csv
 
 from redis_benchmarks_specification.__common__.runner import get_benchmark_specs
 
@@ -229,6 +230,24 @@ def generate_stats_cli_command_logic(args, project_name, project_version):
                 yaml.dump(benchmark_config, file, sort_keys=False, width=100000)
     total_tracked_commands_pct = "n/a"
 
+    module_names = {
+        "ft": "redisearch",
+        "search": "redisearch",
+        "_ft": "redisearch",
+        "graph": "redisgraph",
+        "ts": "redistimeseries",
+        "timeseries": "redistimeseries",
+        "json": "redisjson",
+        "bf": "redisbloom",
+        "cf": "redisbloom",
+        "topk": "redisbloom",
+        "cms": "redisbloom",
+        "tdigest": "redisbloom",
+    }
+
+    group_usage_calls = {}
+    group_usage_usecs = {}
+
     if args.commandstats_csv != "":
         logging.info(
             "Reading commandstats csv {} to determine commands/test coverage".format(
@@ -239,6 +258,7 @@ def generate_stats_cli_command_logic(args, project_name, project_version):
 
         rows = []
         priority = {}
+        priority_usecs = {}
 
         # open file in read mode
         total_count = 0
@@ -251,12 +271,15 @@ def generate_stats_cli_command_logic(args, project_name, project_version):
             csv_reader = reader(x.replace("\0", "") for x in read_obj)
             # Iterate over each row in the csv using reader object
             for row in csv_reader:
-                if len(row) == 0:
+                if len(row) <= 2:
                     continue
                 if "cmdstat_" not in row[0]:
                     continue
                 # row variable is a list that represents a row in csv
                 cmdstat = row[0]
+                cmdstat = cmdstat.lower()
+                if "cmdstat_" not in cmdstat:
+                    continue
                 cmdstat = cmdstat.replace("cmdstat_", "")
                 count = int(row[1])
                 usecs = None
@@ -272,6 +295,15 @@ def generate_stats_cli_command_logic(args, project_name, project_version):
                 deprecated = False
                 if "." in cmdstat:
                     module = True
+                    cmd_module_prefix = cmdstat.split(".")[0]
+                    if cmd_module_prefix in module_names:
+                        group = module_names[cmd_module_prefix]
+                    else:
+                        logging.error(
+                            "command with a module prefix does not have module name {}".format(
+                                cmd_module_prefix
+                            )
+                        )
                 if cmd in commands_json:
                     command_json = commands_json[cmd]
                     group = command_json["group"]
@@ -280,14 +312,35 @@ def generate_stats_cli_command_logic(args, project_name, project_version):
 
                 if module is False or include_modules:
                     priority[cmd.lower()] = count
+                    if type(usecs) == int:
+                        priority_usecs[cmd.lower()] = usecs
 
                 if cmdstat in tracked_commands_json:
                     tracked = True
                 if module is False or include_modules:
                     row = [cmdstat, group, count, usecs, tracked, deprecated]
                     rows.append(row)
+                if group not in group_usage_calls:
+                    group_usage_calls[group] = {}
+                    group_usage_calls[group]["call"] = 0
+                if group not in group_usage_usecs:
+                    group_usage_usecs[group] = {}
+                    group_usage_usecs[group]["usecs"] = 0
+                if type(count) == int:
+                    group_usage_calls[group]["call"] = (
+                        group_usage_calls[group]["call"] + count
+                    )
+                if type(usecs) == int:
+                    group_usage_usecs[group]["usecs"] = (
+                        group_usage_usecs[group]["usecs"] + usecs
+                    )
+                if group == "n/a":
+                    logging.warn("Unable to detect group in {}".format(cmd))
 
         priority_list = sorted(((priority[cmd], cmd) for cmd in priority), reverse=True)
+        priority_list_usecs = sorted(
+            ((priority_usecs[cmd], cmd) for cmd in priority_usecs), reverse=True
+        )
 
         priority_json = {}
         top_10_missing = []
@@ -298,6 +351,16 @@ def generate_stats_cli_command_logic(args, project_name, project_version):
             count = x[0]
             total_count += count
 
+        for group_name, group in group_usage_calls.items():
+            call = group["call"]
+            pct = call / total_count
+            group["pct"] = pct
+
+        for group_name, group in group_usage_usecs.items():
+            usecs = group["usecs"]
+            pct = usecs / total_usecs
+            group["pct"] = pct
+
         for pos, x in enumerate(priority_list, 1):
             count = x[0]
             cmd = x[1]
@@ -322,6 +385,31 @@ def generate_stats_cli_command_logic(args, project_name, project_version):
                 )
                 json.dump(priority_json, fd, indent=True)
 
+        if args.group_csv != "":
+            header = [
+                "group",
+                "count",
+                "usecs",
+                "usec_per_call",
+                "% count",
+                "% usecs",
+            ]
+            with open(args.group_csv, "w", encoding="UTF8", newline="") as f:
+                writer = csv.writer(f)
+
+                # write the header
+                writer.writerow(header)
+                for group_name, group_usage_info in group_usage_calls.items():
+                    count = group_usage_info["call"]
+                    call_pct = group_usage_info["pct"]
+                    usecs = group_usage_usecs[group_name]["usecs"]
+                    usecs_pct = group_usage_usecs[group_name]["pct"]
+                    usecs_per_call = usecs / count
+
+                    writer.writerow(
+                        [group_name, count, usecs, usecs_per_call, call_pct, usecs_pct]
+                    )
+
         if args.summary_csv != "":
             header = [
                 "command",
@@ -335,7 +423,6 @@ def generate_stats_cli_command_logic(args, project_name, project_version):
                 "% usecs",
                 "diff count usecs",
             ]
-            import csv
 
             with open(args.summary_csv, "w", encoding="UTF8", newline="") as f:
                 writer = csv.writer(f)
diff --git a/redis_benchmarks_specification/test-suites/memtier_benchmark-1key-geo-60M-elements-geodist-pipeline-10.yml b/redis_benchmarks_specification/test-suites/memtier_benchmark-1key-geo-60M-elements-geodist-pipeline-10.yml
@@ -9,7 +9,7 @@ dbconfig:
   dataset: https://s3.us-east-2.amazonaws.com/redis.benchmarks.spec/datasets/geopoint/dump.rdb
   resources:
     requests:
-      memory: 1g
+      memory: 6g
 tested-groups:
 - geo
 tested-commands:
@@ -21,7 +21,7 @@ build-variants:
 clientconfig:
   run_image: redislabs/memtier_benchmark:edge
   tool: memtier_benchmark
-  arguments: --pipeline 10 -c 2 -t 2 --command="GEODIST key 1 2"  --hide-histogram --test-time 180
+  arguments: --pipeline 10 -c 50 -t 4 --command="GEODIST key 1 2"  --hide-histogram --test-time 180
   resources:
     requests:
       cpus: '4'
diff --git a/redis_benchmarks_specification/test-suites/memtier_benchmark-1key-geo-60M-elements-geodist.yml b/redis_benchmarks_specification/test-suites/memtier_benchmark-1key-geo-60M-elements-geodist.yml
@@ -9,7 +9,7 @@ dbconfig:
   dataset: https://s3.us-east-2.amazonaws.com/redis.benchmarks.spec/datasets/geopoint/dump.rdb
   resources:
     requests:
-      memory: 1g
+      memory: 6g
 tested-groups:
 - geo
 tested-commands:
@@ -21,7 +21,7 @@ build-variants:
 clientconfig:
   run_image: redislabs/memtier_benchmark:edge
   tool: memtier_benchmark
-  arguments: -c 2 -t 2 --command="GEODIST key 1 2"  --hide-histogram --test-time 180
+  arguments: -c 50 -t 4 --command="GEODIST key 1 2"  --hide-histogram --test-time 180
   resources:
     requests:
       cpus: '4'
diff --git a/redis_benchmarks_specification/test-suites/memtier_benchmark-1key-geo-60M-elements-geohash-pipeline-10.yml b/redis_benchmarks_specification/test-suites/memtier_benchmark-1key-geo-60M-elements-geohash-pipeline-10.yml
@@ -9,7 +9,7 @@ dbconfig:
   dataset: https://s3.us-east-2.amazonaws.com/redis.benchmarks.spec/datasets/geopoint/dump.rdb
   resources:
     requests:
-      memory: 1g
+      memory: 6g
 tested-groups:
 - geo
 tested-commands:
@@ -21,7 +21,7 @@ build-variants:
 clientconfig:
   run_image: redislabs/memtier_benchmark:edge
   tool: memtier_benchmark
-  arguments: --pipeline 10 -c 2 -t 2 --command="GEOHASH key 1"  --hide-histogram --test-time 180
+  arguments: --pipeline 10 -c 50 -t 4 --command="GEOHASH key 1"  --hide-histogram --test-time 180
   resources:
     requests:
       cpus: '4'
diff --git a/redis_benchmarks_specification/test-suites/memtier_benchmark-1key-geo-60M-elements-geohash.yml b/redis_benchmarks_specification/test-suites/memtier_benchmark-1key-geo-60M-elements-geohash.yml
@@ -9,7 +9,7 @@ dbconfig:
   dataset: https://s3.us-east-2.amazonaws.com/redis.benchmarks.spec/datasets/geopoint/dump.rdb
   resources:
     requests:
-      memory: 1g
+      memory: 6g
 tested-groups:
 - geo
 tested-commands:
@@ -21,7 +21,7 @@ build-variants:
 clientconfig:
   run_image: redislabs/memtier_benchmark:edge
   tool: memtier_benchmark
-  arguments: -c 2 -t 2 --command="GEOHASH key 1"  --hide-histogram --test-time 180
+  arguments: -c 50 -t 4 --command="GEOHASH key 1"  --hide-histogram --test-time 180
   resources:
     requests:
       cpus: '4'
diff --git a/redis_benchmarks_specification/test-suites/memtier_benchmark-1key-geo-60M-elements-geopos-pipeline-10.yml b/redis_benchmarks_specification/test-suites/memtier_benchmark-1key-geo-60M-elements-geopos-pipeline-10.yml
@@ -9,7 +9,7 @@ dbconfig:
   dataset: https://s3.us-east-2.amazonaws.com/redis.benchmarks.spec/datasets/geopoint/dump.rdb
   resources:
     requests:
-      memory: 1g
+      memory: 6g
 tested-groups:
 - geo
 tested-commands:
@@ -21,7 +21,7 @@ build-variants:
 clientconfig:
   run_image: redislabs/memtier_benchmark:edge
   tool: memtier_benchmark
-  arguments: --pipeline 10 -c 2 -t 2 --command="GEOPOS key 1"  --hide-histogram --test-time 180
+  arguments: --pipeline 10 -c 50 -t 4 --command="GEOPOS key 1"  --hide-histogram --test-time 180
   resources:
     requests:
       cpus: '4'
diff --git a/redis_benchmarks_specification/test-suites/memtier_benchmark-1key-geo-60M-elements-geopos.yml b/redis_benchmarks_specification/test-suites/memtier_benchmark-1key-geo-60M-elements-geopos.yml
@@ -9,7 +9,7 @@ dbconfig:
   dataset: https://s3.us-east-2.amazonaws.com/redis.benchmarks.spec/datasets/geopoint/dump.rdb
   resources:
     requests:
-      memory: 1g
+      memory: 6g
 tested-groups:
 - geo
 tested-commands:
@@ -21,7 +21,7 @@ build-variants:
 clientconfig:
   run_image: redislabs/memtier_benchmark:edge
   tool: memtier_benchmark
-  arguments: -c 2 -t 2 --command="GEOPOS key 1"  --hide-histogram --test-time 180
+  arguments: -c 50 -t 4 --command="GEOPOS key 1"  --hide-histogram --test-time 180
   resources:
     requests:
       cpus: '4'
diff --git a/redis_benchmarks_specification/test-suites/memtier_benchmark-1key-geo-60M-elements-geosearch-fromlonlat-bybox.yml b/redis_benchmarks_specification/test-suites/memtier_benchmark-1key-geo-60M-elements-geosearch-fromlonlat-bybox.yml
@@ -9,7 +9,7 @@ dbconfig:
   dataset: https://s3.us-east-2.amazonaws.com/redis.benchmarks.spec/datasets/geopoint/dump.rdb
   resources:
     requests:
-      memory: 1g
+      memory: 6g
 tested-groups:
 - geo
 tested-commands:
@@ -21,7 +21,7 @@ build-variants:
 clientconfig:
   run_image: redislabs/memtier_benchmark:edge
   tool: memtier_benchmark
-  arguments: -c 2 -t 2 --command="GEOSEARCH key FROMLONLAT 7.0 55.0 BYBOX 200 200 KM"  --hide-histogram --test-time 180
+  arguments: -c 2 -t 4 --command="GEOSEARCH key FROMLONLAT 7.0 55.0 BYBOX 200 200 KM"  --hide-histogram --test-time 180
   resources:
     requests:
       cpus: '4'
diff --git a/redis_benchmarks_specification/test-suites/memtier_benchmark-1key-geo-60M-elements-geosearch-fromlonlat-pipeline-10.yml b/redis_benchmarks_specification/test-suites/memtier_benchmark-1key-geo-60M-elements-geosearch-fromlonlat-pipeline-10.yml
@@ -9,7 +9,7 @@ dbconfig:
   dataset: https://s3.us-east-2.amazonaws.com/redis.benchmarks.spec/datasets/geopoint/dump.rdb
   resources:
     requests:
-      memory: 1g
+      memory: 6g
 tested-groups:
 - geo
 tested-commands:
@@ -21,7 +21,7 @@ build-variants:
 clientconfig:
   run_image: redislabs/memtier_benchmark:edge
   tool: memtier_benchmark
-  arguments: --pipeline 10 -c 2 -t 2 --command="GEOSEARCH key FROMLONLAT 7.0 55.0 BYRADIUS 200 KM"  --hide-histogram --test-time 180
+  arguments: --pipeline 10 -c 2 -t 4 --command="GEOSEARCH key FROMLONLAT 7.0 55.0 BYRADIUS 200 KM"  --hide-histogram --test-time 180
   resources:
     requests:
       cpus: '4'
diff --git a/redis_benchmarks_specification/test-suites/memtier_benchmark-1key-geo-60M-elements-geosearch-fromlonlat.yml b/redis_benchmarks_specification/test-suites/memtier_benchmark-1key-geo-60M-elements-geosearch-fromlonlat.yml
@@ -9,7 +9,7 @@ dbconfig:
   dataset: https://s3.us-east-2.amazonaws.com/redis.benchmarks.spec/datasets/geopoint/dump.rdb
   resources:
     requests:
-      memory: 1g
+      memory: 6g
 tested-groups:
 - geo
 tested-commands:
@@ -21,7 +21,7 @@ build-variants:
 clientconfig:
   run_image: redislabs/memtier_benchmark:edge
   tool: memtier_benchmark
-  arguments: -c 2 -t 2 --command="GEOSEARCH key FROMLONLAT 7.0 55.0 BYRADIUS 200 KM"  --hide-histogram --test-time 180
+  arguments: -c 2 -t 4 --command="GEOSEARCH key FROMLONLAT 7.0 55.0 BYRADIUS 200 KM"  --hide-histogram --test-time 180
   resources:
     requests:
       cpus: '4'
diff --git a/redis_benchmarks_specification/test-suites/memtier_benchmark-1key-zset-10-elements-zrange-all-elements-long-scores.yml b/redis_benchmarks_specification/test-suites/memtier_benchmark-1key-zset-10-elements-zrange-all-elements-long-scores.yml
@@ -0,0 +1,31 @@
+version: 0.4
+name: memtier_benchmark-1key-zset-10-elements-zrange-all-elements-long-scores
+description: 'Runs memtier_benchmark, for a keyspace length of 1 SORTED SET key. The SORTED SET contains 10 elements in it and we query it using ZRANGE BYSCORE with a range of all elements. The scores are long numbers.'
+dbconfig:
+  configuration-parameters:
+    save: '""'
+  check:
+    keyspacelen: 1
+  resources:
+    requests:
+      memory: 1g
+  init_commands:
+  - '"ZADD" "zset:10:long_score" "10000000" "lysbgqqfqw" "10000001" "mtccjerdon" "10000002" "jekkafodvk" "10000003" "nmgxcctxpn" "10000004" "vyqqkuszzh" "10000005" "pytrnqdhvs" "10000006" "oguwnmniig" "10000007" "gekntrykfh" "10000008" "nhfnbxqgol" "10000009" "cgoeihlnei"'
+tested-groups:
+- sorted-set
+tested-commands:
+- zrange
+redis-topologies:
+- oss-standalone
+build-variants:
+- gcc:8.5.0-amd64-debian-buster-default
+clientconfig:
+  run_image: redislabs/memtier_benchmark:edge
+  tool: memtier_benchmark
+  arguments: --command="ZRANGE zset:10:long_score 0 1000000000 BYSCORE WITHSCORES"  --hide-histogram --test-time 180
+  resources:
+    requests:
+      cpus: '4'
+      memory: 2g
+
+priority: 53
diff --git a/redis_benchmarks_specification/test-suites/memtier_benchmark-1key-zset-100-elements-zrangebyscore-all-elements-long-scores.yml b/redis_benchmarks_specification/test-suites/memtier_benchmark-1key-zset-100-elements-zrangebyscore-all-elements-long-scores.yml

Original file line number	Diff line number	Diff line change
`@@ -46,6 +46,7 @@ def spec_cli_args(parser):`
`46`	`46`	`help="Include modules statistics on commandstats.",`
`47`	`47`	`)`
`48`	`48`	`parser.add_argument("--summary-csv", type=str, default="")`
	`49`	`+ parser.add_argument("--group-csv", type=str, default="")`
`49`	`50`	`parser.add_argument("--commands-json-file", type=str, default="./commands.json")`
`50`	`51`	`parser.add_argument(`
`51`	`52`	`"--commands-priority-file", type=str, default="./commands-priority.json"`