Skip to content

Commit 4af845f

Browse files
author
Balazs Gibizer
committed
Log which instance event was timed out
Nova compute logs what external events it waits for and logs if timeout happens but it is not easy to figure out which even are timed out if nova waits for multiple events, i.e.: there are multiple ports to be plugged. This patch moves the warning log into wait_for_instance_event itself and extends the log with the name of the event nova waited for when the timeout happens. Change-Id: I91c42af8226dc8607d86c52413924351ba0586ac
1 parent d630615 commit 4af845f

File tree

3 files changed

+246
-34
lines changed

3 files changed

+246
-34
lines changed

nova/compute/manager.py

Lines changed: 101 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -402,6 +402,79 @@ def exit_wait_early(self, events):
402402
def _default_error_callback(self, event_name, instance):
403403
raise exception.NovaException(_('Instance event failed'))
404404

405+
class _InstanceEvent:
406+
EXPECTED = "expected"
407+
WAITING = "waiting"
408+
RECEIVED = "received"
409+
RECEIVED_EARLY = "received early"
410+
TIMED_OUT = "timed out"
411+
RECEIVED_NOT_PROCESSED = "received but not processed"
412+
413+
def __init__(self, name: str, event: eventlet.event.Event) -> None:
414+
self.name = name
415+
self.event = event
416+
self.status = self.EXPECTED
417+
self.wait_time = None
418+
419+
def mark_as_received_early(self) -> None:
420+
self.status = self.RECEIVED_EARLY
421+
422+
def is_received_early(self) -> bool:
423+
return self.status == self.RECEIVED_EARLY
424+
425+
def _update_status_no_wait(self):
426+
if self.status == self.EXPECTED and self.event.ready():
427+
self.status = self.RECEIVED_NOT_PROCESSED
428+
429+
def wait(self) -> 'objects.InstanceExternalEvent':
430+
self.status = self.WAITING
431+
try:
432+
with timeutils.StopWatch() as sw:
433+
instance_event = self.event.wait()
434+
except eventlet.timeout.Timeout:
435+
self.status = self.TIMED_OUT
436+
self.wait_time = sw.elapsed()
437+
438+
raise
439+
440+
self.status = self.RECEIVED
441+
self.wait_time = sw.elapsed()
442+
return instance_event
443+
444+
def __str__(self) -> str:
445+
self._update_status_no_wait()
446+
if self.status == self.EXPECTED:
447+
return f"{self.name}: expected but not received"
448+
if self.status == self.RECEIVED:
449+
return (
450+
f"{self.name}: received after waiting "
451+
f"{self.wait_time:.2f} seconds")
452+
if self.status == self.TIMED_OUT:
453+
return (
454+
f"{self.name}: timed out after "
455+
f"{self.wait_time:.2f} seconds")
456+
return f"{self.name}: {self.status}"
457+
458+
@staticmethod
459+
def _wait_for_instance_events(
460+
instance: 'objects.Instance',
461+
events: dict,
462+
error_callback: ty.Callable,
463+
) -> None:
464+
for event_name, event in events.items():
465+
if event.is_received_early():
466+
continue
467+
else:
468+
actual_event = event.wait()
469+
if actual_event.status == 'completed':
470+
continue
471+
# If we get here, we have an event that was not completed,
472+
# nor skipped via exit_wait_early(). Decide whether to
473+
# keep waiting by calling the error_callback() hook.
474+
decision = error_callback(event_name, instance)
475+
if decision is False:
476+
break
477+
405478
@contextlib.contextmanager
406479
def wait_for_instance_event(self, instance, event_names, deadline=300,
407480
error_callback=None):
@@ -454,9 +527,10 @@ def wait_for_instance_event(self, instance, event_names, deadline=300,
454527
name, tag = event_name
455528
event_name = objects.InstanceExternalEvent.make_key(name, tag)
456529
try:
457-
events[event_name] = (
530+
event = (
458531
self._compute.instance_events.prepare_for_instance_event(
459532
instance, name, tag))
533+
events[event_name] = self._InstanceEvent(event_name, event)
460534
except exception.NovaException:
461535
error_callback(event_name, instance)
462536
# NOTE(danms): Don't wait for any of the events. They
@@ -468,25 +542,35 @@ def wait_for_instance_event(self, instance, event_names, deadline=300,
468542
except self._exit_early_exc as e:
469543
early_events = set([objects.InstanceExternalEvent.make_key(n, t)
470544
for n, t in e.events])
471-
else:
472-
early_events = set([])
545+
546+
# If there are expected events that received early, mark them,
547+
# so they won't be waited for later
548+
for early_event_name in early_events:
549+
if early_event_name in events:
550+
events[early_event_name].mark_as_received_early()
473551

474552
sw = timeutils.StopWatch()
475553
sw.start()
476-
with eventlet.timeout.Timeout(deadline):
477-
for event_name, event in events.items():
478-
if event_name in early_events:
479-
continue
480-
else:
481-
actual_event = event.wait()
482-
if actual_event.status == 'completed':
483-
continue
484-
# If we get here, we have an event that was not completed,
485-
# nor skipped via exit_wait_early(). Decide whether to
486-
# keep waiting by calling the error_callback() hook.
487-
decision = error_callback(event_name, instance)
488-
if decision is False:
489-
break
554+
try:
555+
with eventlet.timeout.Timeout(deadline):
556+
self._wait_for_instance_events(
557+
instance, events, error_callback)
558+
except eventlet.timeout.Timeout:
559+
LOG.warning(
560+
'Timeout waiting for %(events)s for instance with '
561+
'vm_state %(vm_state)s and task_state %(task_state)s. '
562+
'Event states are: %(event_states)s',
563+
{
564+
'events': list(events.keys()),
565+
'vm_state': instance.vm_state,
566+
'task_state': instance.task_state,
567+
'event_states':
568+
', '.join([str(event) for event in events.values()]),
569+
},
570+
instance=instance)
571+
572+
raise
573+
490574
LOG.debug('Instance event wait completed in %i seconds for %s',
491575
sw.elapsed(),
492576
','.join(x[0] for x in event_names),

nova/tests/unit/compute/test_virtapi.py

Lines changed: 142 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414

1515
import collections
1616

17+
import eventlet.timeout
1718
import mock
1819
import os_traits
1920
from oslo_utils.fixture import uuidsentinel as uuids
@@ -187,15 +188,150 @@ def do_test():
187188
do_test()
188189

189190
def test_wait_for_instance_event_timeout(self):
191+
instance = mock.Mock()
192+
instance.vm_state = mock.sentinel.vm_state
193+
instance.task_state = mock.sentinel.task_state
194+
195+
mock_log = mock.Mock()
196+
197+
@mock.patch.object(compute_manager, 'LOG', new=mock_log)
190198
@mock.patch.object(self.virtapi._compute, '_event_waiter',
191-
side_effect=test.TestingException())
192-
@mock.patch('eventlet.timeout.Timeout')
193-
def do_test(mock_timeout, mock_waiter):
194-
with self.virtapi.wait_for_instance_event('instance',
195-
[('foo', 'bar')]):
199+
side_effect=eventlet.timeout.Timeout())
200+
def do_test(mock_waiter):
201+
with self.virtapi.wait_for_instance_event(
202+
instance, [('foo', 'bar')]):
196203
pass
197204

198-
self.assertRaises(test.TestingException, do_test)
205+
self.assertRaises(eventlet.timeout.Timeout, do_test)
206+
mock_log.warning.assert_called_once_with(
207+
'Timeout waiting for %(events)s for instance with vm_state '
208+
'%(vm_state)s and task_state %(task_state)s. '
209+
'Event states are: %(event_states)s',
210+
{
211+
'events': ['foo-bar'],
212+
'vm_state': mock.sentinel.vm_state,
213+
'task_state': mock.sentinel.task_state,
214+
'event_states':
215+
'foo-bar: timed out after 0.00 seconds',
216+
},
217+
instance=instance
218+
)
219+
220+
def test_wait_for_instance_event_one_received_one_timed_out(self):
221+
instance = mock.Mock()
222+
instance.vm_state = mock.sentinel.vm_state
223+
instance.task_state = mock.sentinel.task_state
224+
225+
mock_log = mock.Mock()
226+
227+
calls = []
228+
229+
def fake_event_waiter(*args, **kwargs):
230+
calls.append((args, kwargs))
231+
if len(calls) == 1:
232+
event = mock.Mock(status="completed")
233+
return event
234+
else:
235+
raise eventlet.timeout.Timeout()
236+
237+
@mock.patch.object(compute_manager, 'LOG', new=mock_log)
238+
@mock.patch.object(self.virtapi._compute, '_event_waiter',
239+
side_effect=fake_event_waiter)
240+
def do_test(mock_waiter):
241+
with self.virtapi.wait_for_instance_event(
242+
instance, [('foo', 'bar'), ('missing', 'event')]):
243+
pass
244+
245+
self.assertRaises(eventlet.timeout.Timeout, do_test)
246+
mock_log.warning.assert_called_once_with(
247+
'Timeout waiting for %(events)s for instance with vm_state '
248+
'%(vm_state)s and task_state %(task_state)s. '
249+
'Event states are: %(event_states)s',
250+
{
251+
'events': ['foo-bar', 'missing-event'],
252+
'vm_state': mock.sentinel.vm_state,
253+
'task_state': mock.sentinel.task_state,
254+
'event_states':
255+
'foo-bar: received after waiting 0.00 seconds, '
256+
'missing-event: timed out after 0.00 seconds',
257+
},
258+
instance=instance
259+
)
260+
261+
def test_wait_for_instance_event_multiple_events(self):
262+
instance = mock.Mock()
263+
instance.vm_state = mock.sentinel.vm_state
264+
instance.task_state = mock.sentinel.task_state
265+
266+
mock_log = mock.Mock()
267+
268+
calls = []
269+
270+
def fake_event_waiter(*args, **kwargs):
271+
calls.append((args, kwargs))
272+
if len(calls) == 1:
273+
event = mock.Mock(status="completed")
274+
return event
275+
else:
276+
raise eventlet.timeout.Timeout()
277+
278+
def fake_prepare_for_instance_event(instance, name, tag):
279+
m = mock.MagicMock()
280+
m.instance = instance
281+
m.name = name
282+
m.tag = tag
283+
m.event_name = '%s-%s' % (name, tag)
284+
m.wait.side_effect = fake_event_waiter
285+
print(name, tag)
286+
if name == 'received-but-not-waited':
287+
m.ready.return_value = True
288+
if name == 'missing-but-not-waited':
289+
m.ready.return_value = False
290+
return m
291+
292+
self.virtapi._compute.instance_events.prepare_for_instance_event.\
293+
side_effect = fake_prepare_for_instance_event
294+
295+
@mock.patch.object(compute_manager, 'LOG', new=mock_log)
296+
def do_test():
297+
with self.virtapi.wait_for_instance_event(
298+
instance,
299+
[
300+
('received', 'event'),
301+
('early', 'event'),
302+
('missing', 'event'),
303+
('received-but-not-waited', 'event'),
304+
('missing-but-not-waited', 'event'),
305+
]
306+
):
307+
self.virtapi.exit_wait_early([('early', 'event')])
308+
309+
self.assertRaises(eventlet.timeout.Timeout, do_test)
310+
mock_log.warning.assert_called_once_with(
311+
'Timeout waiting for %(events)s for instance with vm_state '
312+
'%(vm_state)s and task_state %(task_state)s. '
313+
'Event states are: %(event_states)s',
314+
{
315+
'events':
316+
[
317+
'received-event',
318+
'early-event',
319+
'missing-event',
320+
'received-but-not-waited-event',
321+
'missing-but-not-waited-event'
322+
],
323+
'vm_state': mock.sentinel.vm_state,
324+
'task_state': mock.sentinel.task_state,
325+
'event_states':
326+
'received-event: received after waiting 0.00 seconds, '
327+
'early-event: received early, '
328+
'missing-event: timed out after 0.00 seconds, '
329+
'received-but-not-waited-event: received but not '
330+
'processed, '
331+
'missing-but-not-waited-event: expected but not received'
332+
},
333+
instance=instance
334+
)
199335

200336
def test_wait_for_instance_event_exit_early(self):
201337
# Wait for two events, exit early skipping one.

nova/virt/libvirt/driver.py

Lines changed: 3 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -7268,17 +7268,9 @@ def _create_guest_with_network(
72687268
pause=pause, power_on=power_on,
72697269
post_xml_callback=post_xml_callback)
72707270
except eventlet.timeout.Timeout:
7271-
# We never heard from Neutron
7272-
LOG.warning(
7273-
'Timeout waiting for %(events)s for instance with '
7274-
'vm_state %(vm_state)s and task_state %(task_state)s',
7275-
{
7276-
'events': events,
7277-
'vm_state': instance.vm_state,
7278-
'task_state': instance.task_state,
7279-
},
7280-
instance=instance)
7281-
7271+
# We did not receive all expected events from Neutron, a warning
7272+
# has already been logged by wait_for_instance_event, but we need
7273+
# to decide if the issue is fatal.
72827274
if CONF.vif_plugging_is_fatal:
72837275
# NOTE(stephenfin): don't worry, guest will be in scope since
72847276
# we can only hit this branch if the VIF plug timed out

0 commit comments

Comments
 (0)