Improved metadata retrieval interface.

jnguertin · copybara-github · commit f32f45b85647 · 2025-10-24T14:22:06.000-07:00
PiperOrigin-RevId: 823664128
diff --git a/perfkitbenchmarker/edw_service.py b/perfkitbenchmarker/edw_service.py
@@ -20,6 +20,7 @@
 from typing import Any, Dict, List
 
 from absl import flags
+from absl import logging
 from perfkitbenchmarker import resource
 
 flags.DEFINE_integer(
@@ -598,11 +599,39 @@ def GetIterationAuxiliaryMetrics(self, iter_run_key: str) -> Dict[str, Any]:
 
     Returns:
       A dictionary of the following format:
-        { 'metric_1': { 'value': 1, 'unit': 'imperial femtoseconds' },
-          'metric_2': { 'value': 2, 'unit': 'metric dollars' }
+        { 'metric_1': { 'value': 1, 'unit': 'imperial femtoseconds'},
+          'metric_2': { 'value': 2, 'unit': 'metric dollars'}
         ...}
     """
-    raise NotImplementedError
+    logging.info(
+        'Per-iteration auxiliary metrics are not supported for this service.'
+    )
+    del iter_run_key
+    return {}
+
+  def GetTimeBoundAuxiliaryMetrics(
+      self, start_timestamp: float, end_timestamp: float
+  ) -> List[Dict[str, Any]]:
+    """Returns service-specific metrics from a set time range.
+
+    Whenever possible, the service should return metrics only from the compute
+    cluster used for the current benchmark run.
+
+    Args:
+      start_timestamp: Start of the time range to retrieve metrics for.
+      end_timestamp: End of the time range to retrieve metrics for.
+
+    Returns:
+      A list of the following format:
+        [{'metric_1': 'value': 1, 'unit': 'imperial nanoseconds', metadata: {}},
+         {'metric_2': 'value': 2, 'unit': 'metric dollars', metadata: {}}
+        ...]
+    """
+    logging.info(
+        'Time-bound auxiliary metrics are not supported for this service.'
+    )
+    del start_timestamp, end_timestamp
+    return []
 
   def CreateSearchIndex(
       self, table_path: str, index_name: str
@@ -758,3 +787,18 @@ def TextSearchQuery(
       A tuple of execution time in seconds and a dictionary of metadata.
     """
     raise NotImplementedError
+
+  @staticmethod
+  def ColsToRows(col_res: dict[str, list[Any]]) -> list[dict[str, Any]]:
+    """Converts a dictionary of columns to a list of rows.
+
+    Args:
+      col_res: A dictionary of columns to convert to a list of rows.
+
+    Returns:
+      A list of dictionaries, where each dictionary represents a row.
+
+      e.g. {'col1': [1, 2, 3], 'col2': [4, 5, 6]} -> [{'col1': 1, 'col2': 4},
+        {'col1': 2, 'col2': 5}, {'col1': 3, 'col2': 6}].
+    """
+    return [dict(zip(col_res.keys(), row)) for row in zip(*col_res.values())]
diff --git a/perfkitbenchmarker/providers/gcp/bigquery.py b/perfkitbenchmarker/providers/gcp/bigquery.py
@@ -20,7 +20,7 @@
 import os
 import random
 import re
-from typing import Any
+from typing import Any, override
 
 from absl import flags
 from perfkitbenchmarker import data
@@ -769,6 +769,7 @@ def GetAutoscaleSlotSeconds(self, run_iter_id: str) -> int:
     run_cost = output['details']['query_results']['billed_slot_seconds'][0]
     return run_cost
 
+  @override
   def GetIterationAuxiliaryMetrics(self, iter_run_key: str) -> dict[str, Any]:
     service_auxiliary_metrics = {}
     try:
diff --git a/tests/edw_service_test.py b/tests/edw_service_test.py
@@ -16,6 +16,7 @@
 import copy
 import unittest
 from absl import flags
+from absl.testing import parameterized
 from perfkitbenchmarker import edw_service
 from perfkitbenchmarker.configs import benchmark_config_spec
 from tests import pkb_common_test_case
@@ -110,6 +111,44 @@ def testPkbManagedGetClusterIdentifier(self):
     )
     self.assertEqual('pkb-' + FLAGS.run_uri, edw_local.cluster_identifier)
 
+  @parameterized.named_parameters(
+      dict(
+          testcase_name='_empty_dict',
+          cols={},
+          expected=[],
+      ),
+      dict(
+          testcase_name='_single_column',
+          cols={'col1': ['val1', 'val2']},
+          expected=[{'col1': 'val1'}, {'col1': 'val2'}],
+      ),
+      dict(
+          testcase_name='_multiple_columns',
+          cols={'col1': ['val1', 'val2'], 'col2': ['val3', 'val4']},
+          expected=[
+              {'col1': 'val1', 'col2': 'val3'},
+              {'col1': 'val2', 'col2': 'val4'},
+          ],
+      ),
+      dict(
+          testcase_name='_uneven_columns',
+          cols={'col1': ['val1', 'val2'], 'col2': ['val3', 'val4', 'val5']},
+          expected=[
+              {'col1': 'val1', 'col2': 'val3'},
+              {'col1': 'val2', 'col2': 'val4'},
+          ],
+      ),
+  )
+  def testColsToRows(self, cols=None, expected=None):
+    self.assertEqual(
+        expected,
+        edw_service.EdwService.ColsToRows(cols),
+        msg=(
+            f'Expected {expected} but got'
+            f' {edw_service.EdwService.ColsToRows(cols)}'
+        ),
+    )
+
 
 if __name__ == '__main__':
   unittest.main()