Add result parsing for benchbase

ScottLinnn · copybara-github · commit 951ebe82a479 · 2025-12-08T12:01:09.000-08:00
PiperOrigin-RevId: 839911398
diff --git a/perfkitbenchmarker/linux_benchmarks/benchbase_benchmark.py b/perfkitbenchmarker/linux_benchmarks/benchbase_benchmark.py
@@ -80,8 +80,6 @@ def GetConfig(user_config: Dict[str, Any]) -> Dict[str, Any]:
   return config
 
 
-# TODO(shuninglin): need to implement auth logic(automatic password gen)
-# for DSQL
 def Prepare(benchmark_spec: bm_spec.BenchmarkSpec) -> None:
   """Prepares the benchmark by installing BenchBase and loading data.
 
@@ -147,10 +145,8 @@ def Run(benchmark_spec: bm_spec.BenchmarkSpec) -> List[sample.Sample]:
       ' --execute=true"'
   )
   client_vm.RemoteCommand(run_command)
-  # TODO(shuninglin): Parse results from the output files
-
-  samples: List[sample.Sample] = []
-  return samples
+  metadata = benchmark_spec.relational_db.GetResourceMetadata()
+  return benchbase.ParseResults(client_vm, metadata)
 
 
 def Cleanup(benchmark_spec: bm_spec.BenchmarkSpec) -> None:
diff --git a/perfkitbenchmarker/linux_packages/benchbase.py b/perfkitbenchmarker/linux_packages/benchbase.py
@@ -13,15 +13,17 @@
 # limitations under the License.
 
 """Module containing Benchbase installation and cleanup functions."""
-# TODO(shuninglin): Add result parsing functions
 
+import json
 import logging
 import os
+from typing import Any
 
 from absl import flags
 import jinja2
 from perfkitbenchmarker import data as pkb_data
 from perfkitbenchmarker import errors
+from perfkitbenchmarker import sample
 from perfkitbenchmarker import sql_engine_utils
 from perfkitbenchmarker import virtual_machine
 
@@ -209,6 +211,66 @@ def CreateConfigFile(vm: virtual_machine.BaseVirtualMachine) -> None:
     return
 
 
+def ParseResults(
+    vm: virtual_machine.BaseVirtualMachine, metadata: dict[str, Any]
+) -> list[sample.Sample]:
+  """Parses the latest benchbase result file and returns metrics.
+
+  Args:
+    vm: The virtual machine to parse results from.
+    metadata: The metadata to attach to the samples.
+
+  Returns:
+    A list of sample.Sample objects.
+
+  Raises:
+    errors.Benchmarks.RunError: If the result file is not found or cannot be
+      parsed.
+  """
+  stdout, _ = vm.RemoteCommand(
+      f'ls -t {BENCHBASE_DIR}/results/tpcc*summary.json | head -n 1'
+  )
+  result_file = stdout.strip()
+  if not result_file:
+    raise errors.Benchmarks.RunError('Benchbase result file not found.')
+  stdout, _ = vm.RemoteCommand(f'cat {result_file}')
+  try:
+    results = json.loads(stdout)
+  except json.JSONDecodeError as e:
+    raise errors.Benchmarks.RunError(
+        f'Error parsing benchbase result file {result_file}: {e}'
+    ) from e
+  samples: list[sample.Sample] = []
+  latency_metrics = results.get('Latency Distribution', {})
+  if not latency_metrics:
+    raise errors.Benchmarks.RunError(
+        'Latency Distribution not found in benchbase result file.'
+    )
+  for key, value in latency_metrics.items():
+    metric_name = (
+        key.replace('(microseconds)', '').strip().replace(' ', '_').lower()
+    )
+    samples.append(sample.Sample(metric_name, value / 1000, 'ms', metadata))
+
+  if 'Throughput (requests/second)' in results:
+    throughput = results['Throughput (requests/second)']
+    samples.append(
+        sample.Sample(
+            'tps',
+            throughput,
+            'tps',
+            metadata,
+        )
+    )
+    tpmc = throughput * int(_BENCHBASE_TXN_WEIGHTS.value[0]) / 100.0 * 60.0
+    samples.append(sample.Sample('tpmc', tpmc, 'tpm', metadata))
+  else:
+    raise errors.Benchmarks.RunError(
+        'Throughput (requests/second) not found in benchbase result file.'
+    )
+  return samples
+
+
 def OverrideEndpoint(
     vm: virtual_machine.BaseVirtualMachine, endpoint: str
 ) -> None:
diff --git a/tests/linux_benchmarks/benchbase_benchmark_test.py b/tests/linux_benchmarks/benchbase_benchmark_test.py
@@ -37,6 +37,8 @@ def setUp(self):
         benchmark_spec.BenchmarkSpec, instance=True
     )
     self.mock_benchmark_spec.vms = [self.mock_vm]
+    self.mock_benchmark_spec.relational_db = mock.Mock()
+    self.mock_benchmark_spec.relational_db.GetResourceMetadata.return_value = {}
     self.mock_load_config = self.enter_context(
         mock.patch.object(configs, 'LoadConfig', autospec=True)
     )
@@ -58,10 +60,13 @@ def test_prepare(self, mock_create_config):
     self.mock_vm.Install.assert_called_once_with('benchbase')
     mock_create_config.assert_called_once_with(self.mock_vm)
 
-  def test_run(self):
-    # TODO(shuninglin): Update test when Run is implemented
+  @mock.patch.object(benchbase, 'ParseResults', autospec=True)
+  def test_run(self, mock_parse_results):
+    mock_parse_results.return_value = []
     results = benchbase_benchmark.Run(self.mock_benchmark_spec)
     self.assertEqual(results, [])
+    self.mock_benchmark_spec.relational_db.GetResourceMetadata.assert_called_once()
+    mock_parse_results.assert_called_once_with(self.mock_vm, {})
 
 
 if __name__ == '__main__':
diff --git a/tests/linux_packages/benchbase_test.py b/tests/linux_packages/benchbase_test.py
@@ -1,5 +1,6 @@
 """Tests for benchbase package."""
 
+import json
 import unittest
 from unittest import mock
 
@@ -110,6 +111,47 @@ def test_create_config_file_aurora_dsql(self, _):
         'jdbc:postgresql://localhost:5432/postgres', context['jdbc_url']
     )
 
+  def test_parse_results(self):
+    self.vm.RemoteCommand.side_effect = [
+        ('tpcc_2025-12-03_19-45-37.summary.json', ''),
+        (
+            json.dumps({
+                'Latency Distribution': {
+                    '95th Percentile Latency (microseconds)': 1555074,
+                    'Maximum Latency (microseconds)': 5588384,
+                    'Median Latency (microseconds)': 656210,
+                    'Minimum Latency (microseconds)': 7632,
+                    '25th Percentile Latency (microseconds)': 270068,
+                    '90th Percentile Latency (microseconds)': 1442239,
+                    '99th Percentile Latency (microseconds)': 2621825,
+                    '75th Percentile Latency (microseconds)': 1121111,
+                    'Average Latency (microseconds)': 732735,
+                },
+                'Throughput (requests/second)': 404.000,
+            }),
+            '',
+        ),
+    ]
+    results = benchbase.ParseResults(self.vm, {})
+    self.assertLen(results, 11)
+    actual_metrics = {s.metric: s.value for s in results}
+    expected_metrics = {
+        '95th_percentile_latency': 1555.074,
+        'maximum_latency': 5588.384,
+        'median_latency': 656.210,
+        'minimum_latency': 7.632,
+        '25th_percentile_latency': 270.068,
+        '90th_percentile_latency': 1442.239,
+        '99th_percentile_latency': 2621.825,
+        '75th_percentile_latency': 1121.111,
+        'average_latency': 732.735,
+        'tps': 404.000,
+        'tpmc': 10908.0,
+    }
+    for metric, value in expected_metrics.items():
+      self.assertIn(metric, actual_metrics)
+      self.assertAlmostEqual(actual_metrics[metric], value, places=3)
+
 
 if __name__ == '__main__':
   unittest.main()