Skip to content

Commit d37e32a

Browse files
jacklacey11copybara-github
authored andcommitted
Reclassify timeouts on startup script retrieval to indicate the potential source issue from the startup script service
PiperOrigin-RevId: 852450925
1 parent 4466a7e commit d37e32a

File tree

5 files changed

+57
-3
lines changed

5 files changed

+57
-3
lines changed

perfkitbenchmarker/benchmark_status.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,9 @@ class FailedSubstatus:
7373
# Failure when config values are invalid.
7474
INVALID_VALUE = 'INVALID_VALUE'
7575

76+
# VM under test is not ready, causing the benchmark to fail
77+
VM_NOT_READY = 'VM_NOT_READY'
78+
7679
# List of valid substatuses for use with --retries.
7780
# UNCATEGORIZED failures are not retryable. To make a specific UNCATEGORIZED
7881
# failure retryable, please raise an errors.Benchmarks.KnownIntermittentError.
@@ -87,6 +90,7 @@ class FailedSubstatus:
8790
UNSUPPORTED,
8891
COMMAND_TIMEOUT,
8992
RETRIES_EXCEEDED,
93+
VM_NOT_READY,
9094
]
9195

9296

perfkitbenchmarker/linux_packages/linux_boot.py

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,10 @@
2626
FLAGS = flags.FLAGS
2727

2828

29+
class StartupScriptRetrievalError(vm_util.TimeoutExceededRetryError):
30+
"""Raised when GetStartupScriptOutput times out."""
31+
32+
2933
def PrepareBootScriptVM(aux_vm_ips: str, aux_vm_port: int) -> str:
3034
script_path = data.ResourcePath(
3135
os.path.join(DATA_DIR, BOOT_STARTUP_SCRIPT_TEMPLATE)
@@ -83,7 +87,12 @@ def CollectBootSamples(
8387
Returns:
8488
A list of sample.Sample objects.
8589
"""
86-
boot_output = GetStartupScriptOutput(vm, BOOT_SCRIPT_OUTPUT).split('\n')
90+
try:
91+
boot_output = GetStartupScriptOutput(vm, BOOT_SCRIPT_OUTPUT).split('\n')
92+
except vm_util.TimeoutExceededRetryError as e:
93+
raise StartupScriptRetrievalError(
94+
'Timeout getting startup script output.'
95+
) from e
8796
boot_samples = ScrapeConsoleLogLines(
8897
boot_output, create_time, CONSOLE_FIRST_START_MATCHERS
8998
)
@@ -621,7 +630,12 @@ def CollectVmToVmSamples(
621630
) -> List[sample.Sample]:
622631
"""Collect samples related to vm-to-vm networking."""
623632
samples = []
624-
vm_output = GetStartupScriptOutput(vm, BOOT_SCRIPT_OUTPUT)
633+
try:
634+
vm_output = GetStartupScriptOutput(vm, BOOT_SCRIPT_OUTPUT)
635+
except vm_util.TimeoutExceededRetryError as e:
636+
raise StartupScriptRetrievalError(
637+
'Timeout getting startup script output.'
638+
) from e
625639
vm_internal_ip = vm.internal_ip
626640
vm_external_ip = vm.ip_address
627641

perfkitbenchmarker/pkb.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1294,6 +1294,12 @@ def RunBenchmark(
12941294
spec.failed_substatus = (
12951295
benchmark_status.FailedSubstatus.PROCESS_KILLED
12961296
)
1297+
elif isinstance(
1298+
e, cluster_boot_benchmark.linux_boot.StartupScriptRetrievalError
1299+
):
1300+
spec.failed_substatus = (
1301+
benchmark_status.FailedSubstatus.VM_NOT_READY
1302+
)
12971303
elif _IsException(e, vm_util.TimeoutExceededRetryError):
12981304
spec.failed_substatus = (
12991305
benchmark_status.FailedSubstatus.COMMAND_TIMEOUT

tests/linux_packages/linux_boot_test.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -229,7 +229,7 @@ def testParseSystemDCriticalChain(self):
229229

230230
def testCollectVmToVmSamples(self):
231231
"""Test vm to vm networking result parsing."""
232-
# Load startup script data, which ingress timestamps.
232+
# Load startup script data, which contains ingress timestamps.
233233
with open(os.path.join(self.data_dir, 'boot.output')) as f:
234234
boot_output = f.read()
235235
# Load tcpdump output, which contains egress timestamps.
@@ -267,6 +267,20 @@ def testCollectVmToVmSamples(self):
267267
],
268268
)
269269

270+
def testCollectBootSamplesTimeout(self):
271+
"""Test CollectBootSamples raises StartupScriptTimeoutError."""
272+
self.enter_context(
273+
mock.patch.object(
274+
linux_boot,
275+
'GetStartupScriptOutput',
276+
side_effect=vm_util.TimeoutExceededRetryError,
277+
)
278+
)
279+
with self.assertRaises(linux_boot.StartupScriptRetrievalError):
280+
linux_boot.CollectBootSamples(
281+
self.mock_vm, 0, ('', ''), datetime.datetime.now()
282+
)
283+
270284

271285
if __name__ == '__main__':
272286
unittest.main()

tests/pkb_test.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
from perfkitbenchmarker import sample
3333
from perfkitbenchmarker import stages
3434
from perfkitbenchmarker import test_util
35+
from perfkitbenchmarker.linux_packages import linux_boot
3536
from perfkitbenchmarker.providers.gcp import util as gcp_utils
3637
from tests import pkb_common_test_case
3738

@@ -85,6 +86,21 @@ def testCreateProvisionFailedSample(self):
8586
self.spec, error_msg, stages.PROVISION, self.collector
8687
)
8788

89+
def testCreateScriptRetrievalFailedSample(self):
90+
self.flags_mock.create_failed_run_samples = True
91+
error_msg = 'error'
92+
self.run_mock.side_effect = linux_boot.StartupScriptRetrievalError(
93+
error_msg
94+
)
95+
96+
self.assertRaises(
97+
Exception, pkb.RunBenchmark, self.spec, self.collector, False
98+
)
99+
self.assertEqual(
100+
self.spec.failed_substatus,
101+
benchmark_status.FailedSubstatus.VM_NOT_READY,
102+
)
103+
88104
def testCreatePrepareFailedSample(self):
89105
self.flags_mock.create_failed_run_samples = True
90106
error_msg = 'error'

0 commit comments

Comments
 (0)