Default capture_live_migration_timestamps to true.

raymond13513 · copybara-github · commit 65f9e5e0c5dd · 2025-12-17T15:35:55.000-08:00
PiperOrigin-RevId: 845944984
diff --git a/perfkitbenchmarker/time_triggers/base_disruption_trigger.py b/perfkitbenchmarker/time_triggers/base_disruption_trigger.py
@@ -33,6 +33,7 @@
     sample.QPS_TIME_SERIES,
 ]
 PERCENTILES = [0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
+MS_MULTIPLIER = 1000
 
 DEGRADATION_PERCENT = flags.DEFINE_float(
     'maintenance_degradation_percent',
@@ -97,54 +98,55 @@ def GetMetadataForTrigger(self, event: DisruptionEvent) -> Dict[str, Any]:
         'Host_maintenance_end': event.end_time,
     }
 
+  def _GenerateDisruptionTotalTimeSamples(
+      self, samples: MutableSequence[sample.Sample]
+  ) -> MutableSequence[sample.Sample]:
+    """Generate samples for total disruption time."""
+    # Populate the run_number "LM Total Time" by copying the metadata from
+    # (one of) the existing samples. Ideally pkb.DoRunPhase() would have sole
+    # responsibility for populating run_number for all samples, but making
+    # that change might be risky.
+    sample_metadata = (
+        copy.deepcopy(samples[0].metadata) if len(samples) > 0 else {}
+    )
+
+    return [
+        sample.Sample(
+            'LM Total Time',
+            d.total_time,
+            'seconds',
+            sample_metadata | self.GetMetadataForTrigger(d),
+        )
+        for d in self.disruption_events
+    ]
+
   def AppendSamples(
       self,
       unused_sender,
       benchmark_spec: bm_spec.BenchmarkSpec,
       samples: MutableSequence[sample.Sample],
   ):
     """Append samples related to disruption."""
+    samples += self._GenerateDisruptionTotalTimeSamples(samples)
+    samples += self._AppendAggregatedMetrics(samples)
 
-    def generate_disruption_total_time_samples() -> (
-        MutableSequence[sample.Sample]
-    ):
-      # Host maintenance is in s
-      self.disruption_ends = max(
-          [float(d.end_time) * 1000 for d in self.disruption_events],
-          default=0,
-      )
-
-      # Populate the run_number "LM Total Time" by copying the metadata from
-      # (one of) the existing samples. Ideally pkb.DoRunPhase() would have sole
-      # responsibility for populating run_number for all samples, but making
-      # that change might be risky.
-      sample_metadata = (
-          copy.deepcopy(samples[0].metadata) if len(samples) > 0 else {}
-      )
-
-      return [
-          sample.Sample(
-              'LM Total Time',
-              d.total_time,
-              'seconds',
-              sample_metadata | self.GetMetadataForTrigger(d),
-          )
-          for d in self.disruption_events
-      ]
-
-    samples += generate_disruption_total_time_samples()
-    self._AppendAggregatedMetrics(samples)
-
-  def _AppendAggregatedMetrics(self, samples: MutableSequence[sample.Sample]):
+  def _AppendAggregatedMetrics(
+      self, samples: MutableSequence[sample.Sample]
+  ) -> MutableSequence[sample.Sample]:
     """Finds the time series samples and add generate the aggregated metrics."""
     additional_samples = []
     for s in samples:
       if s.metric in TIME_SERIES_SAMPLES_FOR_AGGREGATION:
-        additional_samples += self._AggregateThroughputSample(s)
-    samples += additional_samples
+        for disruption_event in self.disruption_events:
+          additional_samples += self._AggregateThroughputSample(
+              s, disruption_event
+          )
+    return additional_samples
 
   def _AggregateThroughputSample(
-      self, s: sample.Sample
+      self,
+      s: sample.Sample,
+      disruption_event: DisruptionEvent,
   ) -> MutableSequence[sample.Sample]:
     """Aggregate a time series sample into disruption metrics.
 
@@ -153,6 +155,7 @@ def _AggregateThroughputSample(
 
     Args:
       s: A time series sample create using CreateTimeSeriesSample in samples.py
+      disruption_event: The DisruptionEvent being aggregated.
 
     Returns:
       A list of samples.
@@ -166,7 +169,7 @@ def _AggregateThroughputSample(
     # provide it in the metadata.
     ramp_up_ends = time_series[0]
     ramp_down_starts = time_series[-1]
-    disruption_ends = self.GetDisruptionEnds()
+    disruption_ends = disruption_event.end_time * MS_MULTIPLIER
     if disruption_ends is None:
       disruption_ends = time_series[-1]
     if sample.RAMP_DOWN_STARTS in metadata:
@@ -189,7 +192,7 @@ def _AggregateThroughputSample(
         # If more than 1 sequential value is missing from the time series.
         # Distrubute the ops throughout the time series
         if i > 0:
-          time_gap_in_seconds = (time - time_series[i - 1]) / 1000
+          time_gap_in_seconds = (time - time_series[i - 1]) / MS_MULTIPLIER
           missing_entry_count = int((time_gap_in_seconds / interval) - 1)
           if missing_entry_count > 1:
             total_missing_seconds += missing_entry_count * interval
@@ -258,15 +261,6 @@ def _AggregateThroughputSample(
     )
     return samples
 
-  def GetDisruptionEnds(self) -> float | None:
-    """Get the disruption ends."""
-    if self.disruption_events:
-      return max(
-          [float(d.end_time) * 1000 for d in self.disruption_events],
-          default=None,
-      )
-    return None
-
   def _ComputeLossPercentile(
       self,
       mean: float,
diff --git a/perfkitbenchmarker/time_triggers/maintenance_simulation_trigger.py b/perfkitbenchmarker/time_triggers/maintenance_simulation_trigger.py
@@ -64,7 +64,7 @@
 
 CAPTURE_LIVE_MIGRATION_TIMESTAMPS = flags.DEFINE_boolean(
     'capture_live_migration_timestamps',
-    False,
+    True,
     (
         'Whether to capture maintenance times during migration. '
         'This requires external python script for notification.'
@@ -338,6 +338,11 @@ class MaintenanceEventTrigger(base_disruption_trigger.BaseDisruptionTrigger):
 
   def __init__(self):
     super().__init__(SIMULATE_MAINTENANCE_DELAY.value)
+    if not CAPTURE_LIVE_MIGRATION_TIMESTAMPS.value:
+      logging.warning(
+          'capture_live_migration_timestamps is set to False. This will not'
+          ' be supported in the future and might break the test.'
+      )
     self.capture_live_migration_timestamps = (
         CAPTURE_LIVE_MIGRATION_TIMESTAMPS.value
     )
diff --git a/tests/time_triggers/base_disruption_trigger_test.py b/tests/time_triggers/base_disruption_trigger_test.py
@@ -80,12 +80,11 @@ def testAppendLossFunctionWithDegradationPercent(self):
     FLAGS.maintenance_degradation_percent = 90
     vm_spec = mock.MagicMock(spec=benchmark_spec.BenchmarkSpec)
     self.trigger.trigger_time = datetime.datetime.fromtimestamp(2)
-    self.trigger.disruption_events = []
-    self.enter_context(
-        mock.patch.object(
-            self.trigger, 'GetDisruptionEnds', return_value=None, autospec=True
+    self.trigger.disruption_events = [
+        base_disruption_trigger.DisruptionEvent(
+            start_time=2, end_time=8, total_time=100
         )
-    )
+    ]
     s = [
         sample.CreateTimeSeriesSample(
             [100, 100, 100, 90, 90, 90],
@@ -111,6 +110,20 @@ def testAppendLossFunctionWithDegradationPercent(self):
                 },
                 timestamp=0,
             ),
+            sample.Sample(
+                metric='LM Total Time',
+                value=100.0,
+                unit='seconds',
+                metadata={
+                    'values': [100, 100, 100, 90, 90, 90],
+                    'timestamps': [1000, 2000, 3000, 4000, 5000, 6000],
+                    'interval': 1,
+                    'LM_total_time': 100,
+                    'Host_maintenance_start': 2,
+                    'Host_maintenance_end': 8,
+                },
+                timestamp=0,
+            ),
             sample.Sample(
                 metric='seconds_dropped_below_0_percent',
                 value=0.0,
@@ -197,7 +210,7 @@ def testAppendLossFunctionWithDegradationPercent(self):
             ),
             sample.Sample(
                 metric='total_missing_seconds',
-                value=0,
+                value=0.0,
                 unit='s',
                 metadata={},
                 timestamp=0,
@@ -209,12 +222,11 @@ def testAppendLossFunctionWithDegradationPercent(self):
   def testAppendLossFunctionWithMissingTimeStampsWithRegression(self):
     vm_spec = mock.MagicMock(spec=benchmark_spec.BenchmarkSpec)
     self.trigger.trigger_time = datetime.datetime.fromtimestamp(2)
-    self.trigger.disruption_events = []
-    self.enter_context(
-        mock.patch.object(
-            self.trigger, 'GetDisruptionEnds', return_value=None, autospec=True
+    self.trigger.disruption_events = [
+        base_disruption_trigger.DisruptionEvent(
+            start_time=2, end_time=8, total_time=100
         )
-    )
+    ]
     s = [
         sample.CreateTimeSeriesSample(
             [100, 100, 20, 100],
@@ -240,6 +252,20 @@ def testAppendLossFunctionWithMissingTimeStampsWithRegression(self):
                 },
                 timestamp=0,
             ),
+            sample.Sample(
+                metric='LM Total Time',
+                value=100.0,
+                unit='seconds',
+                metadata={
+                    'values': [100, 100, 20, 100],
+                    'timestamps': [1000, 2000, 6000, 7000],
+                    'interval': 1,
+                    'LM_total_time': 100,
+                    'Host_maintenance_start': 2,
+                    'Host_maintenance_end': 8,
+                },
+                timestamp=0,
+            ),
             sample.Sample(
                 metric='seconds_dropped_below_0_percent',
                 value=0.0,
@@ -337,12 +363,11 @@ def testAppendLossFunctionWithMissingTimeStampsWithRegression(self):
   @mock.patch.object(time, 'time', mock.MagicMock(return_value=0))
   def testAppendLossFunctionWithMissingTimeStampsNoRegression(self):
     self.trigger.trigger_time = datetime.datetime.fromtimestamp(2)
-    self.trigger.disruption_events = []
-    self.enter_context(
-        mock.patch.object(
-            self.trigger, 'GetDisruptionEnds', return_value=None, autospec=True
+    self.trigger.disruption_events = [
+        base_disruption_trigger.DisruptionEvent(
+            start_time=2, end_time=8, total_time=100
         )
-    )
+    ]
     vm_spec = mock.MagicMock(spec=benchmark_spec.BenchmarkSpec)
     s = [
         sample.CreateTimeSeriesSample(
@@ -369,6 +394,20 @@ def testAppendLossFunctionWithMissingTimeStampsNoRegression(self):
                 },
                 timestamp=0,
             ),
+            sample.Sample(
+                metric='LM Total Time',
+                value=100.0,
+                unit='seconds',
+                metadata={
+                    'values': [1, 1, 4, 1],
+                    'timestamps': [1000, 2000, 6000, 7000],
+                    'interval': 1,
+                    'LM_total_time': 100,
+                    'Host_maintenance_start': 2,
+                    'Host_maintenance_end': 8,
+                },
+                timestamp=0,
+            ),
             sample.Sample(
                 metric='seconds_dropped_below_0_percent',
                 value=0.0,
@@ -455,7 +494,7 @@ def testAppendLossFunctionWithMissingTimeStampsNoRegression(self):
             ),
             sample.Sample(
                 metric='total_missing_seconds',
-                value=3,
+                value=3.0,
                 unit='s',
                 metadata={},
                 timestamp=0,
@@ -466,12 +505,11 @@ def testAppendLossFunctionWithMissingTimeStampsNoRegression(self):
   @mock.patch.object(time, 'time', mock.MagicMock(return_value=0))
   def testAppendLossFunctionSamples(self):
     vm_spec = mock.MagicMock(spec=benchmark_spec.BenchmarkSpec)
-    self.trigger.disruption_events = []
-    self.enter_context(
-        mock.patch.object(
-            self.trigger, 'GetDisruptionEnds', return_value=None, autospec=True
+    self.trigger.disruption_events = [
+        base_disruption_trigger.DisruptionEvent(
+            start_time=5, end_time=8, total_time=100
         )
-    )
+    ]
     self.trigger.vms = [mock.MagicMock()]
     s = sample.CreateTimeSeriesSample(
         [1, 1, 1, 1, 0, 0.1, 0.2, 0.3],
@@ -481,6 +519,7 @@ def testAppendLossFunctionSamples(self):
         1,
     )
     samples = [s]
+
     self.trigger.trigger_time = datetime.datetime.fromtimestamp(4)
     self.trigger.AppendSamples(None, vm_spec, samples)
     self.assertEqual(
@@ -506,6 +545,29 @@ def testAppendLossFunctionSamples(self):
                 },
                 timestamp=0,
             ),
+            sample.Sample(
+                metric='LM Total Time',
+                value=100.0,
+                unit='seconds',
+                metadata={
+                    'values': [1, 1, 1, 1, 0, 0.1, 0.2, 0.3],
+                    'timestamps': [
+                        1000,
+                        2000,
+                        3000,
+                        4000,
+                        5000,
+                        6000,
+                        7000,
+                        8000,
+                    ],
+                    'interval': 1,
+                    'LM_total_time': 100,
+                    'Host_maintenance_start': 5,
+                    'Host_maintenance_end': 8,
+                },
+                timestamp=0,
+            ),
             sample.Sample(
                 metric='seconds_dropped_below_0_percent',
                 value=1.0,
@@ -592,7 +654,7 @@ def testAppendLossFunctionSamples(self):
             ),
             sample.Sample(
                 metric='total_missing_seconds',
-                value=0,
+                value=0.0,
                 unit='s',
                 metadata={},
                 timestamp=0,