diff --git a/redis_benchmarks_specification/__builder__/builder.py b/redis_benchmarks_specification/__builder__/builder.py index 616ee57e..ddd34045 100644 --- a/redis_benchmarks_specification/__builder__/builder.py +++ b/redis_benchmarks_specification/__builder__/builder.py @@ -381,6 +381,8 @@ def builder_process_stream( # build_vars_str, # ) build_command = "sh -c 'make -j'" + if "build_command" in build_config: + build_command = build_config["build_command"] if b"build_command" in testDetails: build_command = testDetails[b"build_command"].decode() server_name = "redis" @@ -649,7 +651,13 @@ def generate_benchmark_stream_request( prefix = f"github_org={github_org}/github_repo={github_repo}/git_branch={str(git_branch)}/git_version={str(git_version)}/git_hash={str(git_hash)}" for artifact in build_artifacts: bin_key = f"zipped:artifacts:{prefix}:{id}:{artifact}.zip" - bin_artifact = open(f"{redis_temporary_dir}src/{artifact}", "rb").read() + if artifact == "redisearch.so": + bin_artifact = open( + f"{redis_temporary_dir}modules/redisearch/src/bin/linux-x64-release/search-community/{artifact}", + "rb", + ).read() + else: + bin_artifact = open(f"{redis_temporary_dir}src/{artifact}", "rb").read() bin_artifact_len = len(bytes(bin_artifact)) assert bin_artifact_len > 0 conn.set(bin_key, bytes(bin_artifact), ex=REDIS_BINS_EXPIRE_SECS) diff --git a/redis_benchmarks_specification/__self_contained_coordinator__/clients.py b/redis_benchmarks_specification/__self_contained_coordinator__/clients.py index e09df382..42e319a9 100644 --- a/redis_benchmarks_specification/__self_contained_coordinator__/clients.py +++ b/redis_benchmarks_specification/__self_contained_coordinator__/clients.py @@ -22,3 +22,27 @@ def prepare_memtier_benchmark_parameters( benchmark_command_str = benchmark_command_str + " " + clientconfig["arguments"] return None, benchmark_command_str + + +def prepare_vector_db_benchmark_parameters( + clientconfig, full_benchmark_path, port, server, password, client_mnt_point +): + benchmark_command = [] + # if port is not None: + # benchmark_command.extend(["REDIS_PORT={}".format(port)]) + # if password is not None: + # benchmark_command.extend(["REDIS_AUTH={}".format(password)]) + benchmark_command.extend( + [ + full_benchmark_path, + "--host", + f"{server}", + ] + ) + benchmark_command.extend(["--engines", clientconfig.get("engines", "redis-test")]) + benchmark_command.extend( + ["--datasets", clientconfig.get("datasets", "glove-100-angular")] + ) + benchmark_command_str = " ".join(benchmark_command) + benchmark_command_str = f"bash -c 'ITERATIONS=1 {benchmark_command_str} && mv /code/results {client_mnt_point}.'" + return None, benchmark_command_str diff --git a/redis_benchmarks_specification/__self_contained_coordinator__/post_processing.py b/redis_benchmarks_specification/__self_contained_coordinator__/post_processing.py new file mode 100644 index 00000000..15676c67 --- /dev/null +++ b/redis_benchmarks_specification/__self_contained_coordinator__/post_processing.py @@ -0,0 +1,19 @@ +import os +import json + + +def post_process_vector_db(temporary_dir): + results_dir = os.path.join(temporary_dir, "results") + results = {} + for file in os.listdir(results_dir): + if "upload" in file: + with open(os.path.join(results_dir, file), "r") as f: + upload_results = json.load(f) + results["upload_time"] = upload_results["results"]["upload_time"] + else: + with open(os.path.join(results_dir, file), "r") as f: + query_results = json.load(f) + results["rps"] = query_results["results"]["rps"] + results["precision"] = query_results["results"]["mean_precisions"] + results["total_time"] = query_results["results"]["total_time"] + return results diff --git a/redis_benchmarks_specification/__self_contained_coordinator__/runners.py b/redis_benchmarks_specification/__self_contained_coordinator__/runners.py index 8bb29922..067c7823 100644 --- a/redis_benchmarks_specification/__self_contained_coordinator__/runners.py +++ b/redis_benchmarks_specification/__self_contained_coordinator__/runners.py @@ -56,6 +56,7 @@ ) from redis_benchmarks_specification.__self_contained_coordinator__.clients import ( prepare_memtier_benchmark_parameters, + prepare_vector_db_benchmark_parameters, ) from redis_benchmarks_specification.__self_contained_coordinator__.cpuset import ( extract_db_cpu_limit, @@ -347,9 +348,12 @@ def process_self_contained_coordinator_stream( # backwards compatible if benchmark_tool is None: benchmark_tool = "redis-benchmark" - full_benchmark_path = "/usr/local/bin/{}".format( - benchmark_tool - ) + if benchmark_tool == "vector_db_benchmark": + full_benchmark_path = "python /code/run.py" + else: + full_benchmark_path = "/usr/local/bin/{}".format( + benchmark_tool + ) # setup the benchmark ( @@ -370,32 +374,42 @@ def process_self_contained_coordinator_stream( local_benchmark_output_filename ) ) - if "memtier_benchmark" not in benchmark_tool: - # prepare the benchmark command + if "memtier_benchmark" in benchmark_tool: ( - benchmark_command, + _, benchmark_command_str, - ) = prepare_benchmark_parameters( - benchmark_config, + ) = prepare_memtier_benchmark_parameters( + benchmark_config["clientconfig"], full_benchmark_path, redis_proc_start_port, "localhost", local_benchmark_output_filename, - False, benchmark_tool_workdir, - False, ) - else: + elif "vector_db_benchmark" in benchmark_tool: ( _, benchmark_command_str, - ) = prepare_memtier_benchmark_parameters( + ) = prepare_vector_db_benchmark_parameters( benchmark_config["clientconfig"], full_benchmark_path, redis_proc_start_port, "localhost", + ) + else: + # prepare the benchmark command + ( + benchmark_command, + benchmark_command_str, + ) = prepare_benchmark_parameters( + benchmark_config, + full_benchmark_path, + redis_proc_start_port, + "localhost", local_benchmark_output_filename, + False, benchmark_tool_workdir, + False, ) client_container_image = extract_client_container_image( diff --git a/redis_benchmarks_specification/__self_contained_coordinator__/self_contained_coordinator.py b/redis_benchmarks_specification/__self_contained_coordinator__/self_contained_coordinator.py index 2387f98c..45501952 100644 --- a/redis_benchmarks_specification/__self_contained_coordinator__/self_contained_coordinator.py +++ b/redis_benchmarks_specification/__self_contained_coordinator__/self_contained_coordinator.py @@ -7,6 +7,7 @@ import traceback import re import docker +import docker.errors import redis import os from pathlib import Path @@ -14,6 +15,9 @@ import time from docker.models.containers import Container +from redis_benchmarks_specification.__self_contained_coordinator__.post_processing import ( + post_process_vector_db, +) from redisbench_admin.profilers.profilers_local import ( check_compatible_system_and_kernel_and_prepare_profile, ) @@ -107,6 +111,9 @@ extract_db_cpu_limit, generate_cpuset_cpus, ) +from redis_benchmarks_specification.__self_contained_coordinator__.clients import ( + prepare_vector_db_benchmark_parameters, +) from redis_benchmarks_specification.__self_contained_coordinator__.docker import ( generate_standalone_redis_server_args, ) @@ -364,6 +371,7 @@ def self_contained_coordinator_blocking_read( count=1, block=0, ) + logging.info(f"New test info: {newTestInfo}") if len(newTestInfo[0]) < 2 or len(newTestInfo[0][1]) < 1: stream_id = ">" else: @@ -549,7 +557,7 @@ def process_self_contained_coordinator_stream( ) new_executable = f"{mnt_point}{server_name}-server" logging.info( - "changing executable from {executable} to {new_executable}" + f"changing executable from {executable} to {new_executable}" ) executable = new_executable @@ -906,9 +914,12 @@ def process_self_contained_coordinator_stream( # backwards compatible if benchmark_tool is None: benchmark_tool = "redis-benchmark" - full_benchmark_path = "/usr/local/bin/{}".format( - benchmark_tool - ) + if benchmark_tool == "vector_db_benchmark": + full_benchmark_path = "python /code/run.py" + else: + full_benchmark_path = "/usr/local/bin/{}".format( + benchmark_tool + ) # setup the benchmark ( @@ -929,41 +940,53 @@ def process_self_contained_coordinator_stream( local_benchmark_output_filename ) ) - if "memtier_benchmark" not in benchmark_tool: + if "memtier_benchmark" in benchmark_tool: # prepare the benchmark command ( - benchmark_command, + _, benchmark_command_str, - ) = prepare_benchmark_parameters( - benchmark_config, + arbitrary_command, + ) = prepare_memtier_benchmark_parameters( + benchmark_config["clientconfig"], full_benchmark_path, redis_proc_start_port, "localhost", + None, local_benchmark_output_filename, False, - benchmark_tool_workdir, False, + False, + None, + None, + None, + None, + override_test_time, ) - else: + elif "vector_db_benchmark" in benchmark_tool: ( _, benchmark_command_str, - arbitrary_command, - ) = prepare_memtier_benchmark_parameters( + ) = prepare_vector_db_benchmark_parameters( benchmark_config["clientconfig"], full_benchmark_path, redis_proc_start_port, "localhost", None, + client_mnt_point, + ) + else: + ( + benchmark_command, + benchmark_command_str, + ) = prepare_benchmark_parameters( + benchmark_config, + full_benchmark_path, + redis_proc_start_port, + "localhost", local_benchmark_output_filename, False, + benchmark_tool_workdir, False, - False, - None, - None, - None, - None, - override_test_time, ) client_container_image = extract_client_container_image( @@ -995,23 +1018,37 @@ def process_self_contained_coordinator_stream( ) # run the benchmark benchmark_start_time = datetime.datetime.now() - - client_container_stdout = docker_client.containers.run( - image=client_container_image, - volumes={ - temporary_dir_client: { - "bind": client_mnt_point, - "mode": "rw", - }, - }, - auto_remove=True, - privileged=True, - working_dir=benchmark_tool_workdir, - command=benchmark_command_str, - network_mode="host", - detach=False, - cpuset_cpus=client_cpuset_cpus, - ) + try: + client_container_stdout = ( + docker_client.containers.run( + image=client_container_image, + volumes={ + temporary_dir_client: { + "bind": client_mnt_point, + "mode": "rw", + }, + }, + auto_remove=True, + privileged=True, + working_dir=benchmark_tool_workdir, + command=benchmark_command_str, + network_mode="host", + detach=False, + cpuset_cpus=client_cpuset_cpus, + ) + ) + except docker.errors.ContainerError as e: + logging.info( + "stdout: {}".format( + e.container.logs(stdout=True) + ) + ) + logging.info( + "stderr: {}".format( + e.container.logs(stderr=True) + ) + ) + raise e benchmark_end_time = datetime.datetime.now() benchmark_duration_seconds = ( @@ -1133,39 +1170,44 @@ def process_self_contained_coordinator_stream( and git_timestamp_ms is not None ): datapoint_time_ms = git_timestamp_ms - post_process_benchmark_results( - benchmark_tool, - local_benchmark_output_filename, - datapoint_time_ms, - start_time_str, - client_container_stdout, - None, - ) - full_result_path = local_benchmark_output_filename - if "memtier_benchmark" in benchmark_tool: - full_result_path = "{}/{}".format( - temporary_dir_client, + if "vector_db_benchmark" in benchmark_tool: + results_dict = post_process_vector_db( + temporary_dir_client + ) + else: + post_process_benchmark_results( + benchmark_tool, local_benchmark_output_filename, + datapoint_time_ms, + start_time_str, + client_container_stdout, + None, ) - logging.info( - "Reading results json from {}".format( - full_result_path + full_result_path = local_benchmark_output_filename + if "memtier_benchmark" in benchmark_tool: + full_result_path = "{}/{}".format( + temporary_dir_client, + local_benchmark_output_filename, + ) + logging.info( + "Reading results json from {}".format( + full_result_path + ) ) - ) - with open( - full_result_path, - "r", - ) as json_file: - results_dict = json.load(json_file) - print_results_table_stdout( - benchmark_config, - default_metrics, - results_dict, - setup_type, - test_name, - None, - ) + with open( + full_result_path, + "r", + ) as json_file: + results_dict = json.load(json_file) + print_results_table_stdout( + benchmark_config, + default_metrics, + results_dict, + setup_type, + test_name, + None, + ) dataset_load_duration_seconds = 0 try: diff --git a/redis_benchmarks_specification/setups/builders/gcc:10.5.0-amd64-debian-bullseye-redisearch.yml b/redis_benchmarks_specification/setups/builders/gcc:10.5.0-amd64-debian-bullseye-redisearch.yml new file mode 100644 index 00000000..4a24fb5b --- /dev/null +++ b/redis_benchmarks_specification/setups/builders/gcc:10.5.0-amd64-debian-bullseye-redisearch.yml @@ -0,0 +1,24 @@ +version: 0.1 +id: gcc:10.5.0-amd64-debian-bullseye-redisearch +os: debian-bullseye +arch: amd64 +compiler: "gcc" +cpp_compiler: "g++" +kind: docker +build_image: gcc:10.5.0-bullseye +run_image: debian:bullseye +description: "Using GNU Compiler Containers (https://hub.docker.com/_/gcc?tab=description) + pre-configured environment with all the tools required to build with gcc." +build_artifacts: + - redisearch.so + - redis-server +build_command: "sh -c 'apt update && apt install python3-pip -y && BUILD_WITH_MODULES=yes make --ignore-errors -j && cd modules/redisearch/src && make setup && make build && cd -'" +metadata: + compiler: "gcc" + compiler_version: "10.5.0" + os: debian-bullseye + arch: amd64 + +env: + REDIS_CFLAGS: "-g -fno-omit-frame-pointer" + diff --git a/redis_benchmarks_specification/setups/topologies/topologies.yml b/redis_benchmarks_specification/setups/topologies/topologies.yml index 59e9d610..61d8cecd 100644 --- a/redis_benchmarks_specification/setups/topologies/topologies.yml +++ b/redis_benchmarks_specification/setups/topologies/topologies.yml @@ -10,6 +10,17 @@ spec: cpus: "1" memory: "10g" + - name: oss-standalone-with-redisearch + type: oss-standalone + redis_topology: + primaries: 1 + replicas: 0 + redis_arguments: --loadmodule /mnt/redis/redisearch.so + resources: + requests: + cpus: "1" + memory: "10g" + - name: oss-standalone-02-io-threads type: oss-standalone redis_topology: diff --git a/redis_benchmarks_specification/test-suites/vector_db_benchmark_test.yml b/redis_benchmarks_specification/test-suites/vector_db_benchmark_test.yml new file mode 100644 index 00000000..cfc72475 --- /dev/null +++ b/redis_benchmarks_specification/test-suites/vector_db_benchmark_test.yml @@ -0,0 +1,43 @@ +version: 0.4 +name: vector_db_benchmark_test +description: Test if vector-db-benchmark can be run with this tool +dbconfig: + configuration-parameters: + save: '""' + check: + keyspacelen: 0 + resources: + requests: + memory: 1g +tested-groups: +- redisearch +tested-commands: +- search +redis-topologies: +- oss-standalone-with-redisearch +build-variants: +- gcc:10.5.0-amd64-debian-bullseye-redisearch +- dockerhub +clientconfig: + run_image: vector-db-benchmark:test + tool: vector_db_benchmark + datasets: laion-img-emb-512-1M-cosine + engine: redis-hnsw-m-16-ef-128 + #arguments: '"--data-size" "100" --command "LPUSH __key__ __data__" --command-key-pattern="P" --key-minimum=1 --key-maximum 1000000 --test-time 180 -c 50 -t 4 --hide-histogram' + resources: + requests: + cpus: '4' + memory: 2g + +exporter: + redistimeseries: + break_by: + - version + - commit + metrics: + - upload_time + - total_time + - rps + - precision + +priority: 38