diff --git a/script/testing/oltpbench/constants.py b/script/testing/oltpbench/constants.py index 3d6fa97849..15b6797c8c 100755 --- a/script/testing/oltpbench/constants.py +++ b/script/testing/oltpbench/constants.py @@ -3,11 +3,15 @@ from ..util.constants import DIR_TMP # git settings for OLTPBench. -OLTPBENCH_GIT_URL = "https://github.com/oltpbenchmark/oltpbench.git" -OLTPBENCH_GIT_LOCAL_PATH = os.path.join(DIR_TMP, "oltpbench") +OLTPBENCH_VERSION = "benchbase-2021-SNAPSHOT" +OLTPBENCH_GIT_URL = "https://github.com/cmu-db/benchbase.git" +OLTPBENCH_GIT_LOCAL_PATH = os.path.join(DIR_TMP, "benchbase") +OLTPBENCH_GIT_TARGET_PATH = os.path.join(OLTPBENCH_GIT_LOCAL_PATH, "target") +OLTPBENCH_GIT_FINAL_PATH = os.path.join(OLTPBENCH_GIT_TARGET_PATH, OLTPBENCH_VERSION) OLTPBENCH_GIT_CLEAN_COMMAND = "rm -rf {}".format(OLTPBENCH_GIT_LOCAL_PATH) -OLTPBENCH_GIT_CLONE_COMMAND = "git clone --depth 1 {} {}".format(OLTPBENCH_GIT_URL, - OLTPBENCH_GIT_LOCAL_PATH) +OLTPBENCH_GIT_CLONE_COMMAND = "git clone --depth 1 {} {}".format( + OLTPBENCH_GIT_URL, + OLTPBENCH_GIT_LOCAL_PATH) # OLTPBench default settings. OLTPBENCH_DEFAULT_TIME = 30 @@ -21,7 +25,7 @@ OLTPBENCH_DEFAULT_DBTYPE = "noisepage" OLTPBENCH_DEFAULT_DRIVER = "org.postgresql.Driver" OLTPBENCH_DEFAULT_RATE = "unlimited" -OLTPBENCH_DEFAULT_BIN = os.path.join(OLTPBENCH_GIT_LOCAL_PATH, "oltpbenchmark") +OLTPBENCH_DEFAULT_BIN = "java -jar benchbase.jar " OLTPBENCH_DEFAULT_DATABASE_RESTART = True OLTPBENCH_DEFAULT_DATABASE_CREATE = True OLTPBENCH_DEFAULT_DATABASE_LOAD = True @@ -30,17 +34,8 @@ OLTPBENCH_DEFAULT_WAL_ENABLE = True OLTPBENCH_DEFAULT_CONTINUE_ON_ERROR = False -OLTPBENCH_DIR_CONFIG = os.path.join(OLTPBENCH_GIT_LOCAL_PATH, "config") -OLTPBENCH_DIR_TEST_RESULT = os.path.join(OLTPBENCH_GIT_LOCAL_PATH, "results") - -# ant commands for invoking OLTPBench. -OLTPBENCH_ANT_BUILD_FILE = os.path.join(OLTPBENCH_GIT_LOCAL_PATH, "build.xml") -OLTPBENCH_ANT_COMMANDS = [ - "ant bootstrap -buildfile {}".format(OLTPBENCH_ANT_BUILD_FILE), - "ant resolve -buildfile {}".format(OLTPBENCH_ANT_BUILD_FILE), - "ant clean -buildfile {}".format(OLTPBENCH_ANT_BUILD_FILE), - "ant build -buildfile {}".format(OLTPBENCH_ANT_BUILD_FILE), -] +OLTPBENCH_DIR_CONFIG = os.path.join(OLTPBENCH_GIT_FINAL_PATH, "config", "noisepage") +OLTPBENCH_DIR_TEST_RESULT = os.path.join(OLTPBENCH_GIT_FINAL_PATH, "results") # API endpoints for Performance Storage Service # Each pair represents different environment. One could choose where the benchmark testing result will be uploaded to diff --git a/script/testing/oltpbench/test_case_oltp.py b/script/testing/oltpbench/test_case_oltp.py index 74bcba480c..30ec0632fa 100644 --- a/script/testing/oltpbench/test_case_oltp.py +++ b/script/testing/oltpbench/test_case_oltp.py @@ -79,15 +79,15 @@ def _init_test_case(self): self.test_output_file = os.path.join(self.test_result_dir, "oltpbench.log") - # oltpbench historgrams results - json format + # oltpbench histograms results - json format self.test_histograms_json_file = self.args.get("test_json_histograms") if not self.test_histograms_json_file: self.test_histograms_json_file = "oltp_histograms_" + self.filename_suffix + ".json" self.test_histogram_path = os.path.join( - constants.OLTPBENCH_GIT_LOCAL_PATH, self.test_histograms_json_file) + constants.OLTPBENCH_GIT_FINAL_PATH, self.test_histograms_json_file) # oltpbench initiate database and load data - self.oltp_flag = "--histograms --execute={EXECUTE} -s {BUCKETS}".format( + self.oltp_flag = "--execute={EXECUTE} -s {BUCKETS}".format( EXECUTE=self.db_execute, BUCKETS=self.buckets) # oltpbench test command @@ -98,7 +98,7 @@ def _init_test_case(self): XML=self.xml_config, FLAGS=self.oltp_flag, HISTOGRAMS=self.test_histogram_path) - self.test_command_cwd = constants.OLTPBENCH_GIT_LOCAL_PATH + self.test_command_cwd = constants.OLTPBENCH_GIT_FINAL_PATH def run_pre_test(self): self._config_xml_file() @@ -149,9 +149,9 @@ def _get_db_url(self): def _config_xml_file(self): xml = ElementTree.parse(self.xml_template) root = xml.getroot() - root.find("dbtype").text = constants.OLTPBENCH_DEFAULT_DBTYPE + root.find("type").text = constants.OLTPBENCH_DEFAULT_DBTYPE root.find("driver").text = constants.OLTPBENCH_DEFAULT_DRIVER - root.find("DBUrl").text = self._get_db_url() + root.find("url").text = self._get_db_url() root.find("username").text = constants.OLTPBENCH_DEFAULT_USERNAME root.find("password").text = constants.OLTPBENCH_DEFAULT_PASSWORD root.find("isolation").text = str(self.transaction_isolation) @@ -199,9 +199,7 @@ def _validate_result(self): with open(self.test_histogram_path) as oltp_result_file: test_result = json.load(oltp_result_file) unexpected_result = test_result.get("unexpected", {}).get("HISTOGRAM") - if unexpected_result and unexpected_result.keys(): + if unexpected_result: for test in unexpected_result.keys(): if unexpected_result[test] != 0: raise RuntimeError(str(unexpected_result)) - else: - raise RuntimeError(str(unexpected_result)) diff --git a/script/testing/oltpbench/test_oltpbench.py b/script/testing/oltpbench/test_oltpbench.py index 5217ed38ea..4941893abc 100644 --- a/script/testing/oltpbench/test_oltpbench.py +++ b/script/testing/oltpbench/test_oltpbench.py @@ -2,6 +2,8 @@ from ..util.test_server import TestServer from . import constants +import os + class TestOLTPBench(TestServer): """ @@ -39,5 +41,11 @@ def _build_oltpbench(self): Raises an exception if anything goes wrong. Assumes that _download_oltpbench() has already been run. """ - for command in constants.OLTPBENCH_ANT_COMMANDS: - expect_command(command) + old_dir = os.getcwd() + os.chdir(constants.OLTPBENCH_GIT_LOCAL_PATH) + # --no-transfer-progress: don't show download progress, too noisy + # -Dmaven.test.skip=true: we're not in the business of testing BenchBase, we just want to use it + expect_command("./mvnw package --no-transfer-progress -Dmaven.test.skip=true") + os.chdir(constants.OLTPBENCH_GIT_TARGET_PATH) + expect_command(f"tar xvzf {constants.OLTPBENCH_VERSION}.tgz") + os.chdir(old_dir) diff --git a/script/testing/reporting/constants.py b/script/testing/reporting/constants.py index 174c49265a..ccda9a0f7c 100644 --- a/script/testing/reporting/constants.py +++ b/script/testing/reporting/constants.py @@ -1,6 +1 @@ UNKNOWN_RESULT = 'unknown' -LATENCY_ATTRIBUTE_MAPPING = [ - # key = key in publish result json, value= string to search OLTPBench results for - # TODO(WAN): this mapping could probably be a.. map? {}? - ('l_25', '25'), ('l_75', '75'), ('l_90', '90'), ('l_95', '95'), ('l_99', '99'), - ('avg', 'av'), ('median', 'median'), ('min', 'min'), ('max', 'max')] diff --git a/script/testing/reporting/parsers/oltpbench/res_parser.py b/script/testing/reporting/parsers/oltpbench/res_parser.py index ee26470c09..039b15fd2d 100644 --- a/script/testing/reporting/parsers/oltpbench/res_parser.py +++ b/script/testing/reporting/parsers/oltpbench/res_parser.py @@ -1,6 +1,5 @@ import csv -from ...constants import LATENCY_ATTRIBUTE_MAPPING from ...utils import get_value_by_pattern @@ -29,10 +28,20 @@ def get_latency_val(row, pattern): reader = csv.DictReader(csvfile, delimiter=',') for row in reader: incremental_metrics.append({ - "time": float(gvbp(row, 'time', None)), - "throughput": float(gvbp(row, 'throughput', None)), + "time": float(gvbp(row, 'time(sec)', None)), + "throughput": float(gvbp(row, 'throughput(req/sec)', None)), "latency": {key: get_latency_val(row, pat) - for key, pat in LATENCY_ATTRIBUTE_MAPPING} + for key, pat in [ + ('l_25', '25th_lat(ms)'), + ('l_75', '75th_lat(ms)'), + ('l_90', '90th_lat(ms)'), + ('l_95', '95th_lat(ms)'), + ('l_99', '99th_lat(ms)'), + ('avg', 'avg_lat(ms)'), + ('median', 'median_lat(ms)'), + ('min', 'min_lat(ms)'), + ('max', 'max_lat(ms)') + ]} }) return incremental_metrics diff --git a/script/testing/reporting/parsers/oltpbench/summary_parser.py b/script/testing/reporting/parsers/oltpbench/summary_parser.py index 39e5c35b3c..ec49dd3370 100644 --- a/script/testing/reporting/parsers/oltpbench/summary_parser.py +++ b/script/testing/reporting/parsers/oltpbench/summary_parser.py @@ -1,7 +1,7 @@ import json from time import time -from ...constants import LATENCY_ATTRIBUTE_MAPPING, UNKNOWN_RESULT +from ...constants import UNKNOWN_RESULT from ...utils import get_value_by_pattern @@ -30,7 +30,7 @@ def parse_summary_file(path): """ def get_latency_val(latency_dist, pattern): value = get_value_by_pattern(latency_dist, pattern, None) - return float("{:.4}".format(value)) if value else value + return float("{:.4}".format(float(value))) if value else value with open(path) as summary_file: summary = json.load(summary_file) @@ -38,19 +38,29 @@ def get_latency_val(latency_dist, pattern): metadata = { 'noisepage': { - 'db_version': summary.get('DBMS Version', UNKNOWN_RESULT) + 'db_version': '1.0.0' } } - timestamp = int(get_value_by_pattern(summary, 'timestamp', str(time()))) + timestamp = int(get_value_by_pattern(summary, 'Current Timestamp (milliseconds)', str(time()))) benchmark_type = summary.get('Benchmark Type', UNKNOWN_RESULT) parameters = { 'scale_factor': summary.get('scalefactor', '-1.0'), 'terminals': int(summary.get('terminals', -1)) } metrics = { - 'throughput': get_value_by_pattern(summary, 'throughput', '-1.0'), + 'throughput': get_value_by_pattern(summary, 'Throughput (requests/second)', '-1.0'), 'latency': {key: get_latency_val(latency_dist, pattern) - for key, pattern in LATENCY_ATTRIBUTE_MAPPING} + for key, pattern in [ + ('l_25', '25th Percentile Latency (microseconds)'), + ('l_75', '75th Percentile Latency (microseconds)'), + ('l_90', '90th Percentile Latency (microseconds)'), + ('l_95', '95th Percentile Latency (microseconds)'), + ('l_99', '99th Percentile Latency (microseconds)'), + ('avg', 'Average Latency (microseconds)'), + ('median', 'Median Latency (microseconds)'), + ('min', 'Minimum Latency (microseconds)'), + ('max', 'Maximum Latency (microseconds)') + ]} } return metadata, timestamp, benchmark_type, parameters, metrics diff --git a/script/testing/reporting/parsers/parse_data.py b/script/testing/reporting/parsers/parse_data.py index 743e8265e7..784cd7f2ca 100644 --- a/script/testing/reporting/parsers/parse_data.py +++ b/script/testing/reporting/parsers/parse_data.py @@ -1,3 +1,4 @@ +import glob import os import re from decimal import Decimal @@ -130,6 +131,24 @@ def parse_oltpbench_files(results_dir): metrics : dict The summary measurements that were gathered from the test. """ + + def hack_rename(old_glob_target, new_name): + """ + Wan wants to avoid a rabbit hole of refactoring. + Therefore the new OLTPBench files are being renamed to match old expectations here. + """ + matches = glob.glob(old_glob_target) + assert len(matches) == 1 + os.rename(matches[0], new_name) + + hack_rename(f'{results_dir}/*.results.csv', f'{results_dir}/oltpbench.res') + hack_rename(f'{results_dir}/*.raw.csv', f'{results_dir}/oltpbench.csv') + hack_rename(f'{results_dir}/*.samples.csv', f'{results_dir}/oltpbench.samples') + hack_rename(f'{results_dir}/*.summary.json', f'{results_dir}/oltpbench.summary') + hack_rename(f'{results_dir}/*.params.json', f'{results_dir}/oltpbench.params') + hack_rename(f'{results_dir}/*.metrics.json', f'{results_dir}/oltpbench.metrics') + hack_rename(f'{results_dir}/*.config.xml', f'{results_dir}/oltpbench.expconfig') + config_parameters = parse_config_file(results_dir + '/oltpbench.expconfig') metadata, timestamp, benchmark_type, summary_parameters, metrics = parse_summary_file( results_dir + '/oltpbench.summary') @@ -168,22 +187,10 @@ def _parse_db_metadata(): Warnings -------- - Giant hack that parses a hardcoded constant NOISEPAGE_VERSION - in src/include/common/version.h. + Giant hack that hardcodes version number. If the hack is unsuccessful, it defaults to UNKNOWN_RESULT. """ - regex = r"NOISEPAGE_VERSION[=\s].*(\d.\d.\d)" - curr_dir = os.path.dirname(os.path.realpath(__file__)) - # TODO(WAN): Don't do this. We support SELECT VERSION(), do that instead. - version_file_relative = '../../../../src/include/common/version.h' - version_file = os.path.join(curr_dir, version_file_relative) - db_metadata = {'noisepage': {'db_version': UNKNOWN_RESULT}} - try: - with open(version_file) as f: - match = re.search(regex, f.read()) - db_metadata['noisepage']['db_version'] = match.group(1) - except Exception as err: - LOG.error(err) - - return db_metadata + return {'noisepage': {'db_version': '1.0.0'}} + + diff --git a/script/testing/reporting/report_result.py b/script/testing/reporting/report_result.py index ccc1ef04a3..d7db5b573f 100644 --- a/script/testing/reporting/report_result.py +++ b/script/testing/reporting/report_result.py @@ -119,6 +119,7 @@ def _send_result(env, path, username, password, result): """ url = f"{PERFORMANCE_STORAGE_SERVICE_API.get(env)}{path}" LOG.debug(f"Sending results to: {url}") + LOG.info(f"Uploading result: {result}") try: result = requests.post(url, json=result, auth=(username, password)) diff --git a/src/include/common/version.h b/src/include/common/version.h index d14d193bf8..3befce5689 100644 --- a/src/include/common/version.h +++ b/src/include/common/version.h @@ -5,6 +5,9 @@ namespace noisepage::common { constexpr std::string_view NOISEPAGE_NAME = "NoisePage"; +// TODO(WAN): There used to be a fragile hack in parse_data.py that would try to regex out the version number. +// Please update script/testing/reporting/parsers/parse_data.py manually if you change this version number. +// And also script/testing/reporting/parsers/summary_parser.py. constexpr std::string_view NOISEPAGE_VERSION = "1.0.0"; constexpr std::string_view NOISEPAGE_VERSION_STR = "NoisePage 1.0.0";