diff --git a/script/testing/oltpbench/constants.py b/script/testing/oltpbench/constants.py
index 3d6fa97849..15b6797c8c 100755
--- a/script/testing/oltpbench/constants.py
+++ b/script/testing/oltpbench/constants.py
@@ -3,11 +3,15 @@
 from ..util.constants import DIR_TMP
 
 # git settings for OLTPBench.
-OLTPBENCH_GIT_URL = "https://github.com/oltpbenchmark/oltpbench.git"
-OLTPBENCH_GIT_LOCAL_PATH = os.path.join(DIR_TMP, "oltpbench")
+OLTPBENCH_VERSION = "benchbase-2021-SNAPSHOT"
+OLTPBENCH_GIT_URL = "https://github.com/cmu-db/benchbase.git"
+OLTPBENCH_GIT_LOCAL_PATH = os.path.join(DIR_TMP, "benchbase")
+OLTPBENCH_GIT_TARGET_PATH = os.path.join(OLTPBENCH_GIT_LOCAL_PATH, "target")
+OLTPBENCH_GIT_FINAL_PATH = os.path.join(OLTPBENCH_GIT_TARGET_PATH, OLTPBENCH_VERSION)
 OLTPBENCH_GIT_CLEAN_COMMAND = "rm -rf {}".format(OLTPBENCH_GIT_LOCAL_PATH)
-OLTPBENCH_GIT_CLONE_COMMAND = "git clone --depth 1 {} {}".format(OLTPBENCH_GIT_URL,
-                                                                 OLTPBENCH_GIT_LOCAL_PATH)
+OLTPBENCH_GIT_CLONE_COMMAND = "git clone --depth 1 {} {}".format(
+    OLTPBENCH_GIT_URL,
+    OLTPBENCH_GIT_LOCAL_PATH)
 
 # OLTPBench default settings.
 OLTPBENCH_DEFAULT_TIME = 30
@@ -21,7 +25,7 @@
 OLTPBENCH_DEFAULT_DBTYPE = "noisepage"
 OLTPBENCH_DEFAULT_DRIVER = "org.postgresql.Driver"
 OLTPBENCH_DEFAULT_RATE = "unlimited"
-OLTPBENCH_DEFAULT_BIN = os.path.join(OLTPBENCH_GIT_LOCAL_PATH, "oltpbenchmark")
+OLTPBENCH_DEFAULT_BIN = "java -jar benchbase.jar "
 OLTPBENCH_DEFAULT_DATABASE_RESTART = True
 OLTPBENCH_DEFAULT_DATABASE_CREATE = True
 OLTPBENCH_DEFAULT_DATABASE_LOAD = True
@@ -30,17 +34,8 @@
 OLTPBENCH_DEFAULT_WAL_ENABLE = True
 OLTPBENCH_DEFAULT_CONTINUE_ON_ERROR = False
 
-OLTPBENCH_DIR_CONFIG = os.path.join(OLTPBENCH_GIT_LOCAL_PATH, "config")
-OLTPBENCH_DIR_TEST_RESULT = os.path.join(OLTPBENCH_GIT_LOCAL_PATH, "results")
-
-# ant commands for invoking OLTPBench.
-OLTPBENCH_ANT_BUILD_FILE = os.path.join(OLTPBENCH_GIT_LOCAL_PATH, "build.xml")
-OLTPBENCH_ANT_COMMANDS = [
-    "ant bootstrap -buildfile {}".format(OLTPBENCH_ANT_BUILD_FILE),
-    "ant resolve -buildfile {}".format(OLTPBENCH_ANT_BUILD_FILE),
-    "ant clean -buildfile {}".format(OLTPBENCH_ANT_BUILD_FILE),
-    "ant build -buildfile {}".format(OLTPBENCH_ANT_BUILD_FILE),
-]
+OLTPBENCH_DIR_CONFIG = os.path.join(OLTPBENCH_GIT_FINAL_PATH, "config", "noisepage")
+OLTPBENCH_DIR_TEST_RESULT = os.path.join(OLTPBENCH_GIT_FINAL_PATH, "results")
 
 # API endpoints for Performance Storage Service
 # Each pair represents different environment. One could choose where the benchmark testing result will be uploaded to
diff --git a/script/testing/oltpbench/test_case_oltp.py b/script/testing/oltpbench/test_case_oltp.py
index 74bcba480c..30ec0632fa 100644
--- a/script/testing/oltpbench/test_case_oltp.py
+++ b/script/testing/oltpbench/test_case_oltp.py
@@ -79,15 +79,15 @@ def _init_test_case(self):
             self.test_output_file = os.path.join(self.test_result_dir,
                                                  "oltpbench.log")
 
-        # oltpbench historgrams results - json format
+        # oltpbench histograms results - json format
         self.test_histograms_json_file = self.args.get("test_json_histograms")
         if not self.test_histograms_json_file:
             self.test_histograms_json_file = "oltp_histograms_" + self.filename_suffix + ".json"
         self.test_histogram_path = os.path.join(
-            constants.OLTPBENCH_GIT_LOCAL_PATH, self.test_histograms_json_file)
+            constants.OLTPBENCH_GIT_FINAL_PATH, self.test_histograms_json_file)
 
         # oltpbench initiate database and load data
-        self.oltp_flag = "--histograms --execute={EXECUTE} -s {BUCKETS}".format(
+        self.oltp_flag = "--execute={EXECUTE} -s {BUCKETS}".format(
             EXECUTE=self.db_execute, BUCKETS=self.buckets)
 
         # oltpbench test command
@@ -98,7 +98,7 @@ def _init_test_case(self):
             XML=self.xml_config,
             FLAGS=self.oltp_flag,
             HISTOGRAMS=self.test_histogram_path)
-        self.test_command_cwd = constants.OLTPBENCH_GIT_LOCAL_PATH
+        self.test_command_cwd = constants.OLTPBENCH_GIT_FINAL_PATH
 
     def run_pre_test(self):
         self._config_xml_file()
@@ -149,9 +149,9 @@ def _get_db_url(self):
     def _config_xml_file(self):
         xml = ElementTree.parse(self.xml_template)
         root = xml.getroot()
-        root.find("dbtype").text = constants.OLTPBENCH_DEFAULT_DBTYPE
+        root.find("type").text = constants.OLTPBENCH_DEFAULT_DBTYPE
         root.find("driver").text = constants.OLTPBENCH_DEFAULT_DRIVER
-        root.find("DBUrl").text = self._get_db_url()
+        root.find("url").text = self._get_db_url()
         root.find("username").text = constants.OLTPBENCH_DEFAULT_USERNAME
         root.find("password").text = constants.OLTPBENCH_DEFAULT_PASSWORD
         root.find("isolation").text = str(self.transaction_isolation)
@@ -199,9 +199,7 @@ def _validate_result(self):
         with open(self.test_histogram_path) as oltp_result_file:
             test_result = json.load(oltp_result_file)
         unexpected_result = test_result.get("unexpected", {}).get("HISTOGRAM")
-        if unexpected_result and unexpected_result.keys():
+        if unexpected_result:
             for test in unexpected_result.keys():
                 if unexpected_result[test] != 0:
                     raise RuntimeError(str(unexpected_result))
-        else:
-            raise RuntimeError(str(unexpected_result))
diff --git a/script/testing/oltpbench/test_oltpbench.py b/script/testing/oltpbench/test_oltpbench.py
index 5217ed38ea..4941893abc 100644
--- a/script/testing/oltpbench/test_oltpbench.py
+++ b/script/testing/oltpbench/test_oltpbench.py
@@ -2,6 +2,8 @@
 from ..util.test_server import TestServer
 from . import constants
 
+import os
+
 
 class TestOLTPBench(TestServer):
     """
@@ -39,5 +41,11 @@ def _build_oltpbench(self):
         Raises an exception if anything goes wrong.
         Assumes that _download_oltpbench() has already been run.
         """
-        for command in constants.OLTPBENCH_ANT_COMMANDS:
-            expect_command(command)
+        old_dir = os.getcwd()
+        os.chdir(constants.OLTPBENCH_GIT_LOCAL_PATH)
+        # --no-transfer-progress: don't show download progress, too noisy
+        # -Dmaven.test.skip=true: we're not in the business of testing BenchBase, we just want to use it
+        expect_command("./mvnw package --no-transfer-progress -Dmaven.test.skip=true")
+        os.chdir(constants.OLTPBENCH_GIT_TARGET_PATH)
+        expect_command(f"tar xvzf {constants.OLTPBENCH_VERSION}.tgz")
+        os.chdir(old_dir)
diff --git a/script/testing/reporting/constants.py b/script/testing/reporting/constants.py
index 174c49265a..ccda9a0f7c 100644
--- a/script/testing/reporting/constants.py
+++ b/script/testing/reporting/constants.py
@@ -1,6 +1 @@
 UNKNOWN_RESULT = 'unknown'
-LATENCY_ATTRIBUTE_MAPPING = [
-    # key = key in publish result json, value= string to search OLTPBench results for
-    # TODO(WAN): this mapping could probably be a.. map? {}?
-    ('l_25', '25'), ('l_75', '75'), ('l_90', '90'), ('l_95', '95'), ('l_99', '99'),
-    ('avg', 'av'), ('median', 'median'), ('min', 'min'), ('max', 'max')]
diff --git a/script/testing/reporting/parsers/oltpbench/res_parser.py b/script/testing/reporting/parsers/oltpbench/res_parser.py
index ee26470c09..039b15fd2d 100644
--- a/script/testing/reporting/parsers/oltpbench/res_parser.py
+++ b/script/testing/reporting/parsers/oltpbench/res_parser.py
@@ -1,6 +1,5 @@
 import csv
 
-from ...constants import LATENCY_ATTRIBUTE_MAPPING
 from ...utils import get_value_by_pattern
 
 
@@ -29,10 +28,20 @@ def get_latency_val(row, pattern):
         reader = csv.DictReader(csvfile, delimiter=',')
         for row in reader:
             incremental_metrics.append({
-                "time": float(gvbp(row, 'time', None)),
-                "throughput": float(gvbp(row, 'throughput', None)),
+                "time": float(gvbp(row, 'time(sec)', None)),
+                "throughput": float(gvbp(row, 'throughput(req/sec)', None)),
                 "latency": {key: get_latency_val(row, pat)
-                            for key, pat in LATENCY_ATTRIBUTE_MAPPING}
+                            for key, pat in [
+                                ('l_25', '25th_lat(ms)'),
+                                ('l_75', '75th_lat(ms)'),
+                                ('l_90', '90th_lat(ms)'),
+                                ('l_95', '95th_lat(ms)'),
+                                ('l_99', '99th_lat(ms)'),
+                                ('avg', 'avg_lat(ms)'),
+                                ('median', 'median_lat(ms)'),
+                                ('min', 'min_lat(ms)'),
+                                ('max', 'max_lat(ms)')
+                            ]}
             })
 
     return incremental_metrics
diff --git a/script/testing/reporting/parsers/oltpbench/summary_parser.py b/script/testing/reporting/parsers/oltpbench/summary_parser.py
index 39e5c35b3c..ec49dd3370 100644
--- a/script/testing/reporting/parsers/oltpbench/summary_parser.py
+++ b/script/testing/reporting/parsers/oltpbench/summary_parser.py
@@ -1,7 +1,7 @@
 import json
 from time import time
 
-from ...constants import LATENCY_ATTRIBUTE_MAPPING, UNKNOWN_RESULT
+from ...constants import UNKNOWN_RESULT
 from ...utils import get_value_by_pattern
 
 
@@ -30,7 +30,7 @@ def parse_summary_file(path):
     """
     def get_latency_val(latency_dist, pattern):
         value = get_value_by_pattern(latency_dist, pattern, None)
-        return float("{:.4}".format(value)) if value else value
+        return float("{:.4}".format(float(value))) if value else value
 
     with open(path) as summary_file:
         summary = json.load(summary_file)
@@ -38,19 +38,29 @@ def get_latency_val(latency_dist, pattern):
 
         metadata = {
             'noisepage': {
-                'db_version': summary.get('DBMS Version', UNKNOWN_RESULT)
+                'db_version': '1.0.0'
             }
         }
-        timestamp = int(get_value_by_pattern(summary, 'timestamp', str(time())))
+        timestamp = int(get_value_by_pattern(summary, 'Current Timestamp (milliseconds)', str(time())))
         benchmark_type = summary.get('Benchmark Type', UNKNOWN_RESULT)
         parameters = {
             'scale_factor': summary.get('scalefactor', '-1.0'),
             'terminals': int(summary.get('terminals', -1))
         }
         metrics = {
-            'throughput': get_value_by_pattern(summary, 'throughput', '-1.0'),
+            'throughput': get_value_by_pattern(summary, 'Throughput (requests/second)', '-1.0'),
             'latency': {key: get_latency_val(latency_dist, pattern)
-                        for key, pattern in LATENCY_ATTRIBUTE_MAPPING}
+                        for key, pattern in [
+                            ('l_25', '25th Percentile Latency (microseconds)'),
+                            ('l_75', '75th Percentile Latency (microseconds)'),
+                            ('l_90', '90th Percentile Latency (microseconds)'),
+                            ('l_95', '95th Percentile Latency (microseconds)'),
+                            ('l_99', '99th Percentile Latency (microseconds)'),
+                            ('avg', 'Average Latency (microseconds)'),
+                            ('median', 'Median Latency (microseconds)'),
+                            ('min', 'Minimum Latency (microseconds)'),
+                            ('max', 'Maximum Latency (microseconds)')
+                        ]}
         }
 
         return metadata, timestamp, benchmark_type, parameters, metrics
diff --git a/script/testing/reporting/parsers/parse_data.py b/script/testing/reporting/parsers/parse_data.py
index 743e8265e7..784cd7f2ca 100644
--- a/script/testing/reporting/parsers/parse_data.py
+++ b/script/testing/reporting/parsers/parse_data.py
@@ -1,3 +1,4 @@
+import glob
 import os
 import re
 from decimal import Decimal
@@ -130,6 +131,24 @@ def parse_oltpbench_files(results_dir):
     metrics : dict
         The summary measurements that were gathered from the test.
     """
+
+    def hack_rename(old_glob_target, new_name):
+        """
+        Wan wants to avoid a rabbit hole of refactoring.
+        Therefore the new OLTPBench files are being renamed to match old expectations here.
+        """
+        matches = glob.glob(old_glob_target)
+        assert len(matches) == 1
+        os.rename(matches[0], new_name)
+
+    hack_rename(f'{results_dir}/*.results.csv', f'{results_dir}/oltpbench.res')
+    hack_rename(f'{results_dir}/*.raw.csv', f'{results_dir}/oltpbench.csv')
+    hack_rename(f'{results_dir}/*.samples.csv', f'{results_dir}/oltpbench.samples')
+    hack_rename(f'{results_dir}/*.summary.json', f'{results_dir}/oltpbench.summary')
+    hack_rename(f'{results_dir}/*.params.json', f'{results_dir}/oltpbench.params')
+    hack_rename(f'{results_dir}/*.metrics.json', f'{results_dir}/oltpbench.metrics')
+    hack_rename(f'{results_dir}/*.config.xml', f'{results_dir}/oltpbench.expconfig')
+
     config_parameters = parse_config_file(results_dir + '/oltpbench.expconfig')
     metadata, timestamp, benchmark_type, summary_parameters, metrics = parse_summary_file(
         results_dir + '/oltpbench.summary')
@@ -168,22 +187,10 @@ def _parse_db_metadata():
 
     Warnings
     --------
-    Giant hack that parses a hardcoded constant NOISEPAGE_VERSION
-    in src/include/common/version.h.
+    Giant hack that hardcodes version number.
 
     If the hack is unsuccessful, it defaults to UNKNOWN_RESULT.
     """
-    regex = r"NOISEPAGE_VERSION[=\s].*(\d.\d.\d)"
-    curr_dir = os.path.dirname(os.path.realpath(__file__))
-    # TODO(WAN): Don't do this. We support SELECT VERSION(), do that instead.
-    version_file_relative = '../../../../src/include/common/version.h'
-    version_file = os.path.join(curr_dir, version_file_relative)
-    db_metadata = {'noisepage': {'db_version': UNKNOWN_RESULT}}
-    try:
-        with open(version_file) as f:
-            match = re.search(regex, f.read())
-            db_metadata['noisepage']['db_version'] = match.group(1)
-    except Exception as err:
-        LOG.error(err)
-
-    return db_metadata
+    return {'noisepage': {'db_version': '1.0.0'}}
+
+
diff --git a/script/testing/reporting/report_result.py b/script/testing/reporting/report_result.py
index ccc1ef04a3..d7db5b573f 100644
--- a/script/testing/reporting/report_result.py
+++ b/script/testing/reporting/report_result.py
@@ -119,6 +119,7 @@ def _send_result(env, path, username, password, result):
     """
     url = f"{PERFORMANCE_STORAGE_SERVICE_API.get(env)}{path}"
     LOG.debug(f"Sending results to: {url}")
+    LOG.info(f"Uploading result: {result}")
 
     try:
         result = requests.post(url, json=result, auth=(username, password))
diff --git a/src/include/common/version.h b/src/include/common/version.h
index d14d193bf8..3befce5689 100644
--- a/src/include/common/version.h
+++ b/src/include/common/version.h
@@ -5,6 +5,9 @@
 namespace noisepage::common {
 
 constexpr std::string_view NOISEPAGE_NAME = "NoisePage";
+// TODO(WAN): There used to be a fragile hack in parse_data.py that would try to regex out the version number.
+//  Please update script/testing/reporting/parsers/parse_data.py manually if you change this version number.
+//  And also script/testing/reporting/parsers/summary_parser.py.
 constexpr std::string_view NOISEPAGE_VERSION = "1.0.0";
 constexpr std::string_view NOISEPAGE_VERSION_STR = "NoisePage 1.0.0";