Skip to content

Commit 65f9e5e

Browse files
raymond13513copybara-github
authored andcommitted
Default capture_live_migration_timestamps to true.
PiperOrigin-RevId: 845944984
1 parent 9929f32 commit 65f9e5e

File tree

3 files changed

+130
-69
lines changed

3 files changed

+130
-69
lines changed

perfkitbenchmarker/time_triggers/base_disruption_trigger.py

Lines changed: 39 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
sample.QPS_TIME_SERIES,
3434
]
3535
PERCENTILES = [0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
36+
MS_MULTIPLIER = 1000
3637

3738
DEGRADATION_PERCENT = flags.DEFINE_float(
3839
'maintenance_degradation_percent',
@@ -97,54 +98,55 @@ def GetMetadataForTrigger(self, event: DisruptionEvent) -> Dict[str, Any]:
9798
'Host_maintenance_end': event.end_time,
9899
}
99100

101+
def _GenerateDisruptionTotalTimeSamples(
102+
self, samples: MutableSequence[sample.Sample]
103+
) -> MutableSequence[sample.Sample]:
104+
"""Generate samples for total disruption time."""
105+
# Populate the run_number "LM Total Time" by copying the metadata from
106+
# (one of) the existing samples. Ideally pkb.DoRunPhase() would have sole
107+
# responsibility for populating run_number for all samples, but making
108+
# that change might be risky.
109+
sample_metadata = (
110+
copy.deepcopy(samples[0].metadata) if len(samples) > 0 else {}
111+
)
112+
113+
return [
114+
sample.Sample(
115+
'LM Total Time',
116+
d.total_time,
117+
'seconds',
118+
sample_metadata | self.GetMetadataForTrigger(d),
119+
)
120+
for d in self.disruption_events
121+
]
122+
100123
def AppendSamples(
101124
self,
102125
unused_sender,
103126
benchmark_spec: bm_spec.BenchmarkSpec,
104127
samples: MutableSequence[sample.Sample],
105128
):
106129
"""Append samples related to disruption."""
130+
samples += self._GenerateDisruptionTotalTimeSamples(samples)
131+
samples += self._AppendAggregatedMetrics(samples)
107132

108-
def generate_disruption_total_time_samples() -> (
109-
MutableSequence[sample.Sample]
110-
):
111-
# Host maintenance is in s
112-
self.disruption_ends = max(
113-
[float(d.end_time) * 1000 for d in self.disruption_events],
114-
default=0,
115-
)
116-
117-
# Populate the run_number "LM Total Time" by copying the metadata from
118-
# (one of) the existing samples. Ideally pkb.DoRunPhase() would have sole
119-
# responsibility for populating run_number for all samples, but making
120-
# that change might be risky.
121-
sample_metadata = (
122-
copy.deepcopy(samples[0].metadata) if len(samples) > 0 else {}
123-
)
124-
125-
return [
126-
sample.Sample(
127-
'LM Total Time',
128-
d.total_time,
129-
'seconds',
130-
sample_metadata | self.GetMetadataForTrigger(d),
131-
)
132-
for d in self.disruption_events
133-
]
134-
135-
samples += generate_disruption_total_time_samples()
136-
self._AppendAggregatedMetrics(samples)
137-
138-
def _AppendAggregatedMetrics(self, samples: MutableSequence[sample.Sample]):
133+
def _AppendAggregatedMetrics(
134+
self, samples: MutableSequence[sample.Sample]
135+
) -> MutableSequence[sample.Sample]:
139136
"""Finds the time series samples and add generate the aggregated metrics."""
140137
additional_samples = []
141138
for s in samples:
142139
if s.metric in TIME_SERIES_SAMPLES_FOR_AGGREGATION:
143-
additional_samples += self._AggregateThroughputSample(s)
144-
samples += additional_samples
140+
for disruption_event in self.disruption_events:
141+
additional_samples += self._AggregateThroughputSample(
142+
s, disruption_event
143+
)
144+
return additional_samples
145145

146146
def _AggregateThroughputSample(
147-
self, s: sample.Sample
147+
self,
148+
s: sample.Sample,
149+
disruption_event: DisruptionEvent,
148150
) -> MutableSequence[sample.Sample]:
149151
"""Aggregate a time series sample into disruption metrics.
150152
@@ -153,6 +155,7 @@ def _AggregateThroughputSample(
153155
154156
Args:
155157
s: A time series sample create using CreateTimeSeriesSample in samples.py
158+
disruption_event: The DisruptionEvent being aggregated.
156159
157160
Returns:
158161
A list of samples.
@@ -166,7 +169,7 @@ def _AggregateThroughputSample(
166169
# provide it in the metadata.
167170
ramp_up_ends = time_series[0]
168171
ramp_down_starts = time_series[-1]
169-
disruption_ends = self.GetDisruptionEnds()
172+
disruption_ends = disruption_event.end_time * MS_MULTIPLIER
170173
if disruption_ends is None:
171174
disruption_ends = time_series[-1]
172175
if sample.RAMP_DOWN_STARTS in metadata:
@@ -189,7 +192,7 @@ def _AggregateThroughputSample(
189192
# If more than 1 sequential value is missing from the time series.
190193
# Distrubute the ops throughout the time series
191194
if i > 0:
192-
time_gap_in_seconds = (time - time_series[i - 1]) / 1000
195+
time_gap_in_seconds = (time - time_series[i - 1]) / MS_MULTIPLIER
193196
missing_entry_count = int((time_gap_in_seconds / interval) - 1)
194197
if missing_entry_count > 1:
195198
total_missing_seconds += missing_entry_count * interval
@@ -258,15 +261,6 @@ def _AggregateThroughputSample(
258261
)
259262
return samples
260263

261-
def GetDisruptionEnds(self) -> float | None:
262-
"""Get the disruption ends."""
263-
if self.disruption_events:
264-
return max(
265-
[float(d.end_time) * 1000 for d in self.disruption_events],
266-
default=None,
267-
)
268-
return None
269-
270264
def _ComputeLossPercentile(
271265
self,
272266
mean: float,

perfkitbenchmarker/time_triggers/maintenance_simulation_trigger.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@
6464

6565
CAPTURE_LIVE_MIGRATION_TIMESTAMPS = flags.DEFINE_boolean(
6666
'capture_live_migration_timestamps',
67-
False,
67+
True,
6868
(
6969
'Whether to capture maintenance times during migration. '
7070
'This requires external python script for notification.'
@@ -338,6 +338,11 @@ class MaintenanceEventTrigger(base_disruption_trigger.BaseDisruptionTrigger):
338338

339339
def __init__(self):
340340
super().__init__(SIMULATE_MAINTENANCE_DELAY.value)
341+
if not CAPTURE_LIVE_MIGRATION_TIMESTAMPS.value:
342+
logging.warning(
343+
'capture_live_migration_timestamps is set to False. This will not'
344+
' be supported in the future and might break the test.'
345+
)
341346
self.capture_live_migration_timestamps = (
342347
CAPTURE_LIVE_MIGRATION_TIMESTAMPS.value
343348
)

tests/time_triggers/base_disruption_trigger_test.py

Lines changed: 85 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -80,12 +80,11 @@ def testAppendLossFunctionWithDegradationPercent(self):
8080
FLAGS.maintenance_degradation_percent = 90
8181
vm_spec = mock.MagicMock(spec=benchmark_spec.BenchmarkSpec)
8282
self.trigger.trigger_time = datetime.datetime.fromtimestamp(2)
83-
self.trigger.disruption_events = []
84-
self.enter_context(
85-
mock.patch.object(
86-
self.trigger, 'GetDisruptionEnds', return_value=None, autospec=True
83+
self.trigger.disruption_events = [
84+
base_disruption_trigger.DisruptionEvent(
85+
start_time=2, end_time=8, total_time=100
8786
)
88-
)
87+
]
8988
s = [
9089
sample.CreateTimeSeriesSample(
9190
[100, 100, 100, 90, 90, 90],
@@ -111,6 +110,20 @@ def testAppendLossFunctionWithDegradationPercent(self):
111110
},
112111
timestamp=0,
113112
),
113+
sample.Sample(
114+
metric='LM Total Time',
115+
value=100.0,
116+
unit='seconds',
117+
metadata={
118+
'values': [100, 100, 100, 90, 90, 90],
119+
'timestamps': [1000, 2000, 3000, 4000, 5000, 6000],
120+
'interval': 1,
121+
'LM_total_time': 100,
122+
'Host_maintenance_start': 2,
123+
'Host_maintenance_end': 8,
124+
},
125+
timestamp=0,
126+
),
114127
sample.Sample(
115128
metric='seconds_dropped_below_0_percent',
116129
value=0.0,
@@ -197,7 +210,7 @@ def testAppendLossFunctionWithDegradationPercent(self):
197210
),
198211
sample.Sample(
199212
metric='total_missing_seconds',
200-
value=0,
213+
value=0.0,
201214
unit='s',
202215
metadata={},
203216
timestamp=0,
@@ -209,12 +222,11 @@ def testAppendLossFunctionWithDegradationPercent(self):
209222
def testAppendLossFunctionWithMissingTimeStampsWithRegression(self):
210223
vm_spec = mock.MagicMock(spec=benchmark_spec.BenchmarkSpec)
211224
self.trigger.trigger_time = datetime.datetime.fromtimestamp(2)
212-
self.trigger.disruption_events = []
213-
self.enter_context(
214-
mock.patch.object(
215-
self.trigger, 'GetDisruptionEnds', return_value=None, autospec=True
225+
self.trigger.disruption_events = [
226+
base_disruption_trigger.DisruptionEvent(
227+
start_time=2, end_time=8, total_time=100
216228
)
217-
)
229+
]
218230
s = [
219231
sample.CreateTimeSeriesSample(
220232
[100, 100, 20, 100],
@@ -240,6 +252,20 @@ def testAppendLossFunctionWithMissingTimeStampsWithRegression(self):
240252
},
241253
timestamp=0,
242254
),
255+
sample.Sample(
256+
metric='LM Total Time',
257+
value=100.0,
258+
unit='seconds',
259+
metadata={
260+
'values': [100, 100, 20, 100],
261+
'timestamps': [1000, 2000, 6000, 7000],
262+
'interval': 1,
263+
'LM_total_time': 100,
264+
'Host_maintenance_start': 2,
265+
'Host_maintenance_end': 8,
266+
},
267+
timestamp=0,
268+
),
243269
sample.Sample(
244270
metric='seconds_dropped_below_0_percent',
245271
value=0.0,
@@ -337,12 +363,11 @@ def testAppendLossFunctionWithMissingTimeStampsWithRegression(self):
337363
@mock.patch.object(time, 'time', mock.MagicMock(return_value=0))
338364
def testAppendLossFunctionWithMissingTimeStampsNoRegression(self):
339365
self.trigger.trigger_time = datetime.datetime.fromtimestamp(2)
340-
self.trigger.disruption_events = []
341-
self.enter_context(
342-
mock.patch.object(
343-
self.trigger, 'GetDisruptionEnds', return_value=None, autospec=True
366+
self.trigger.disruption_events = [
367+
base_disruption_trigger.DisruptionEvent(
368+
start_time=2, end_time=8, total_time=100
344369
)
345-
)
370+
]
346371
vm_spec = mock.MagicMock(spec=benchmark_spec.BenchmarkSpec)
347372
s = [
348373
sample.CreateTimeSeriesSample(
@@ -369,6 +394,20 @@ def testAppendLossFunctionWithMissingTimeStampsNoRegression(self):
369394
},
370395
timestamp=0,
371396
),
397+
sample.Sample(
398+
metric='LM Total Time',
399+
value=100.0,
400+
unit='seconds',
401+
metadata={
402+
'values': [1, 1, 4, 1],
403+
'timestamps': [1000, 2000, 6000, 7000],
404+
'interval': 1,
405+
'LM_total_time': 100,
406+
'Host_maintenance_start': 2,
407+
'Host_maintenance_end': 8,
408+
},
409+
timestamp=0,
410+
),
372411
sample.Sample(
373412
metric='seconds_dropped_below_0_percent',
374413
value=0.0,
@@ -455,7 +494,7 @@ def testAppendLossFunctionWithMissingTimeStampsNoRegression(self):
455494
),
456495
sample.Sample(
457496
metric='total_missing_seconds',
458-
value=3,
497+
value=3.0,
459498
unit='s',
460499
metadata={},
461500
timestamp=0,
@@ -466,12 +505,11 @@ def testAppendLossFunctionWithMissingTimeStampsNoRegression(self):
466505
@mock.patch.object(time, 'time', mock.MagicMock(return_value=0))
467506
def testAppendLossFunctionSamples(self):
468507
vm_spec = mock.MagicMock(spec=benchmark_spec.BenchmarkSpec)
469-
self.trigger.disruption_events = []
470-
self.enter_context(
471-
mock.patch.object(
472-
self.trigger, 'GetDisruptionEnds', return_value=None, autospec=True
508+
self.trigger.disruption_events = [
509+
base_disruption_trigger.DisruptionEvent(
510+
start_time=5, end_time=8, total_time=100
473511
)
474-
)
512+
]
475513
self.trigger.vms = [mock.MagicMock()]
476514
s = sample.CreateTimeSeriesSample(
477515
[1, 1, 1, 1, 0, 0.1, 0.2, 0.3],
@@ -481,6 +519,7 @@ def testAppendLossFunctionSamples(self):
481519
1,
482520
)
483521
samples = [s]
522+
484523
self.trigger.trigger_time = datetime.datetime.fromtimestamp(4)
485524
self.trigger.AppendSamples(None, vm_spec, samples)
486525
self.assertEqual(
@@ -506,6 +545,29 @@ def testAppendLossFunctionSamples(self):
506545
},
507546
timestamp=0,
508547
),
548+
sample.Sample(
549+
metric='LM Total Time',
550+
value=100.0,
551+
unit='seconds',
552+
metadata={
553+
'values': [1, 1, 1, 1, 0, 0.1, 0.2, 0.3],
554+
'timestamps': [
555+
1000,
556+
2000,
557+
3000,
558+
4000,
559+
5000,
560+
6000,
561+
7000,
562+
8000,
563+
],
564+
'interval': 1,
565+
'LM_total_time': 100,
566+
'Host_maintenance_start': 5,
567+
'Host_maintenance_end': 8,
568+
},
569+
timestamp=0,
570+
),
509571
sample.Sample(
510572
metric='seconds_dropped_below_0_percent',
511573
value=1.0,
@@ -592,7 +654,7 @@ def testAppendLossFunctionSamples(self):
592654
),
593655
sample.Sample(
594656
metric='total_missing_seconds',
595-
value=0,
657+
value=0.0,
596658
unit='s',
597659
metadata={},
598660
timestamp=0,

0 commit comments

Comments
 (0)