Skip to content

Commit c0ea3b2

Browse files
authored
Merge pull request redis#214 from redis/improve.geo.benchmarks
Updated geo benchmarks to fully saturate one primary. Include zset benchmarks with long score
2 parents f81d695 + 908379e commit c0ea3b2

14 files changed

+450
-186
lines changed

commands-priority.json

Lines changed: 280 additions & 166 deletions
Large diffs are not rendered by default.

redis_benchmarks_specification/__cli__/args.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ def spec_cli_args(parser):
4646
help="Include modules statistics on commandstats.",
4747
)
4848
parser.add_argument("--summary-csv", type=str, default="")
49+
parser.add_argument("--group-csv", type=str, default="")
4950
parser.add_argument("--commands-json-file", type=str, default="./commands.json")
5051
parser.add_argument(
5152
"--commands-priority-file", type=str, default="./commands-priority.json"

redis_benchmarks_specification/__cli__/stats.py

Lines changed: 89 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
import redis
66
import oyaml as yaml
7+
import csv
78

89
from redis_benchmarks_specification.__common__.runner import get_benchmark_specs
910

@@ -229,6 +230,24 @@ def generate_stats_cli_command_logic(args, project_name, project_version):
229230
yaml.dump(benchmark_config, file, sort_keys=False, width=100000)
230231
total_tracked_commands_pct = "n/a"
231232

233+
module_names = {
234+
"ft": "redisearch",
235+
"search": "redisearch",
236+
"_ft": "redisearch",
237+
"graph": "redisgraph",
238+
"ts": "redistimeseries",
239+
"timeseries": "redistimeseries",
240+
"json": "redisjson",
241+
"bf": "redisbloom",
242+
"cf": "redisbloom",
243+
"topk": "redisbloom",
244+
"cms": "redisbloom",
245+
"tdigest": "redisbloom",
246+
}
247+
248+
group_usage_calls = {}
249+
group_usage_usecs = {}
250+
232251
if args.commandstats_csv != "":
233252
logging.info(
234253
"Reading commandstats csv {} to determine commands/test coverage".format(
@@ -239,6 +258,7 @@ def generate_stats_cli_command_logic(args, project_name, project_version):
239258

240259
rows = []
241260
priority = {}
261+
priority_usecs = {}
242262

243263
# open file in read mode
244264
total_count = 0
@@ -251,12 +271,15 @@ def generate_stats_cli_command_logic(args, project_name, project_version):
251271
csv_reader = reader(x.replace("\0", "") for x in read_obj)
252272
# Iterate over each row in the csv using reader object
253273
for row in csv_reader:
254-
if len(row) == 0:
274+
if len(row) <= 2:
255275
continue
256276
if "cmdstat_" not in row[0]:
257277
continue
258278
# row variable is a list that represents a row in csv
259279
cmdstat = row[0]
280+
cmdstat = cmdstat.lower()
281+
if "cmdstat_" not in cmdstat:
282+
continue
260283
cmdstat = cmdstat.replace("cmdstat_", "")
261284
count = int(row[1])
262285
usecs = None
@@ -272,6 +295,15 @@ def generate_stats_cli_command_logic(args, project_name, project_version):
272295
deprecated = False
273296
if "." in cmdstat:
274297
module = True
298+
cmd_module_prefix = cmdstat.split(".")[0]
299+
if cmd_module_prefix in module_names:
300+
group = module_names[cmd_module_prefix]
301+
else:
302+
logging.error(
303+
"command with a module prefix does not have module name {}".format(
304+
cmd_module_prefix
305+
)
306+
)
275307
if cmd in commands_json:
276308
command_json = commands_json[cmd]
277309
group = command_json["group"]
@@ -280,14 +312,35 @@ def generate_stats_cli_command_logic(args, project_name, project_version):
280312

281313
if module is False or include_modules:
282314
priority[cmd.lower()] = count
315+
if type(usecs) == int:
316+
priority_usecs[cmd.lower()] = usecs
283317

284318
if cmdstat in tracked_commands_json:
285319
tracked = True
286320
if module is False or include_modules:
287321
row = [cmdstat, group, count, usecs, tracked, deprecated]
288322
rows.append(row)
323+
if group not in group_usage_calls:
324+
group_usage_calls[group] = {}
325+
group_usage_calls[group]["call"] = 0
326+
if group not in group_usage_usecs:
327+
group_usage_usecs[group] = {}
328+
group_usage_usecs[group]["usecs"] = 0
329+
if type(count) == int:
330+
group_usage_calls[group]["call"] = (
331+
group_usage_calls[group]["call"] + count
332+
)
333+
if type(usecs) == int:
334+
group_usage_usecs[group]["usecs"] = (
335+
group_usage_usecs[group]["usecs"] + usecs
336+
)
337+
if group == "n/a":
338+
logging.warn("Unable to detect group in {}".format(cmd))
289339

290340
priority_list = sorted(((priority[cmd], cmd) for cmd in priority), reverse=True)
341+
priority_list_usecs = sorted(
342+
((priority_usecs[cmd], cmd) for cmd in priority_usecs), reverse=True
343+
)
291344

292345
priority_json = {}
293346
top_10_missing = []
@@ -298,6 +351,16 @@ def generate_stats_cli_command_logic(args, project_name, project_version):
298351
count = x[0]
299352
total_count += count
300353

354+
for group_name, group in group_usage_calls.items():
355+
call = group["call"]
356+
pct = call / total_count
357+
group["pct"] = pct
358+
359+
for group_name, group in group_usage_usecs.items():
360+
usecs = group["usecs"]
361+
pct = usecs / total_usecs
362+
group["pct"] = pct
363+
301364
for pos, x in enumerate(priority_list, 1):
302365
count = x[0]
303366
cmd = x[1]
@@ -322,6 +385,31 @@ def generate_stats_cli_command_logic(args, project_name, project_version):
322385
)
323386
json.dump(priority_json, fd, indent=True)
324387

388+
if args.group_csv != "":
389+
header = [
390+
"group",
391+
"count",
392+
"usecs",
393+
"usec_per_call",
394+
"% count",
395+
"% usecs",
396+
]
397+
with open(args.group_csv, "w", encoding="UTF8", newline="") as f:
398+
writer = csv.writer(f)
399+
400+
# write the header
401+
writer.writerow(header)
402+
for group_name, group_usage_info in group_usage_calls.items():
403+
count = group_usage_info["call"]
404+
call_pct = group_usage_info["pct"]
405+
usecs = group_usage_usecs[group_name]["usecs"]
406+
usecs_pct = group_usage_usecs[group_name]["pct"]
407+
usecs_per_call = usecs / count
408+
409+
writer.writerow(
410+
[group_name, count, usecs, usecs_per_call, call_pct, usecs_pct]
411+
)
412+
325413
if args.summary_csv != "":
326414
header = [
327415
"command",
@@ -335,7 +423,6 @@ def generate_stats_cli_command_logic(args, project_name, project_version):
335423
"% usecs",
336424
"diff count usecs",
337425
]
338-
import csv
339426

340427
with open(args.summary_csv, "w", encoding="UTF8", newline="") as f:
341428
writer = csv.writer(f)

redis_benchmarks_specification/test-suites/memtier_benchmark-1key-geo-60M-elements-geodist-pipeline-10.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ dbconfig:
99
dataset: https://s3.us-east-2.amazonaws.com/redis.benchmarks.spec/datasets/geopoint/dump.rdb
1010
resources:
1111
requests:
12-
memory: 1g
12+
memory: 6g
1313
tested-groups:
1414
- geo
1515
tested-commands:
@@ -21,7 +21,7 @@ build-variants:
2121
clientconfig:
2222
run_image: redislabs/memtier_benchmark:edge
2323
tool: memtier_benchmark
24-
arguments: --pipeline 10 -c 2 -t 2 --command="GEODIST key 1 2" --hide-histogram --test-time 180
24+
arguments: --pipeline 10 -c 50 -t 4 --command="GEODIST key 1 2" --hide-histogram --test-time 180
2525
resources:
2626
requests:
2727
cpus: '4'

redis_benchmarks_specification/test-suites/memtier_benchmark-1key-geo-60M-elements-geodist.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ dbconfig:
99
dataset: https://s3.us-east-2.amazonaws.com/redis.benchmarks.spec/datasets/geopoint/dump.rdb
1010
resources:
1111
requests:
12-
memory: 1g
12+
memory: 6g
1313
tested-groups:
1414
- geo
1515
tested-commands:
@@ -21,7 +21,7 @@ build-variants:
2121
clientconfig:
2222
run_image: redislabs/memtier_benchmark:edge
2323
tool: memtier_benchmark
24-
arguments: -c 2 -t 2 --command="GEODIST key 1 2" --hide-histogram --test-time 180
24+
arguments: -c 50 -t 4 --command="GEODIST key 1 2" --hide-histogram --test-time 180
2525
resources:
2626
requests:
2727
cpus: '4'

redis_benchmarks_specification/test-suites/memtier_benchmark-1key-geo-60M-elements-geohash-pipeline-10.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ dbconfig:
99
dataset: https://s3.us-east-2.amazonaws.com/redis.benchmarks.spec/datasets/geopoint/dump.rdb
1010
resources:
1111
requests:
12-
memory: 1g
12+
memory: 6g
1313
tested-groups:
1414
- geo
1515
tested-commands:
@@ -21,7 +21,7 @@ build-variants:
2121
clientconfig:
2222
run_image: redislabs/memtier_benchmark:edge
2323
tool: memtier_benchmark
24-
arguments: --pipeline 10 -c 2 -t 2 --command="GEOHASH key 1" --hide-histogram --test-time 180
24+
arguments: --pipeline 10 -c 50 -t 4 --command="GEOHASH key 1" --hide-histogram --test-time 180
2525
resources:
2626
requests:
2727
cpus: '4'

redis_benchmarks_specification/test-suites/memtier_benchmark-1key-geo-60M-elements-geohash.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ dbconfig:
99
dataset: https://s3.us-east-2.amazonaws.com/redis.benchmarks.spec/datasets/geopoint/dump.rdb
1010
resources:
1111
requests:
12-
memory: 1g
12+
memory: 6g
1313
tested-groups:
1414
- geo
1515
tested-commands:
@@ -21,7 +21,7 @@ build-variants:
2121
clientconfig:
2222
run_image: redislabs/memtier_benchmark:edge
2323
tool: memtier_benchmark
24-
arguments: -c 2 -t 2 --command="GEOHASH key 1" --hide-histogram --test-time 180
24+
arguments: -c 50 -t 4 --command="GEOHASH key 1" --hide-histogram --test-time 180
2525
resources:
2626
requests:
2727
cpus: '4'

redis_benchmarks_specification/test-suites/memtier_benchmark-1key-geo-60M-elements-geopos-pipeline-10.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ dbconfig:
99
dataset: https://s3.us-east-2.amazonaws.com/redis.benchmarks.spec/datasets/geopoint/dump.rdb
1010
resources:
1111
requests:
12-
memory: 1g
12+
memory: 6g
1313
tested-groups:
1414
- geo
1515
tested-commands:
@@ -21,7 +21,7 @@ build-variants:
2121
clientconfig:
2222
run_image: redislabs/memtier_benchmark:edge
2323
tool: memtier_benchmark
24-
arguments: --pipeline 10 -c 2 -t 2 --command="GEOPOS key 1" --hide-histogram --test-time 180
24+
arguments: --pipeline 10 -c 50 -t 4 --command="GEOPOS key 1" --hide-histogram --test-time 180
2525
resources:
2626
requests:
2727
cpus: '4'

redis_benchmarks_specification/test-suites/memtier_benchmark-1key-geo-60M-elements-geopos.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ dbconfig:
99
dataset: https://s3.us-east-2.amazonaws.com/redis.benchmarks.spec/datasets/geopoint/dump.rdb
1010
resources:
1111
requests:
12-
memory: 1g
12+
memory: 6g
1313
tested-groups:
1414
- geo
1515
tested-commands:
@@ -21,7 +21,7 @@ build-variants:
2121
clientconfig:
2222
run_image: redislabs/memtier_benchmark:edge
2323
tool: memtier_benchmark
24-
arguments: -c 2 -t 2 --command="GEOPOS key 1" --hide-histogram --test-time 180
24+
arguments: -c 50 -t 4 --command="GEOPOS key 1" --hide-histogram --test-time 180
2525
resources:
2626
requests:
2727
cpus: '4'

redis_benchmarks_specification/test-suites/memtier_benchmark-1key-geo-60M-elements-geosearch-fromlonlat-bybox.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ dbconfig:
99
dataset: https://s3.us-east-2.amazonaws.com/redis.benchmarks.spec/datasets/geopoint/dump.rdb
1010
resources:
1111
requests:
12-
memory: 1g
12+
memory: 6g
1313
tested-groups:
1414
- geo
1515
tested-commands:
@@ -21,7 +21,7 @@ build-variants:
2121
clientconfig:
2222
run_image: redislabs/memtier_benchmark:edge
2323
tool: memtier_benchmark
24-
arguments: -c 2 -t 2 --command="GEOSEARCH key FROMLONLAT 7.0 55.0 BYBOX 200 200 KM" --hide-histogram --test-time 180
24+
arguments: -c 2 -t 4 --command="GEOSEARCH key FROMLONLAT 7.0 55.0 BYBOX 200 200 KM" --hide-histogram --test-time 180
2525
resources:
2626
requests:
2727
cpus: '4'

0 commit comments

Comments
 (0)