Skip to content

Commit 4ae5b61

Browse files
Zuulopenstack-gerrit
authored andcommitted
Merge "Make libvirt able to trigger a backend image copy when needed"
2 parents 6f62db9 + 07025ab commit 4ae5b61

File tree

4 files changed

+360
-1
lines changed

4 files changed

+360
-1
lines changed

nova/conf/libvirt.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -940,6 +940,50 @@
940940
cfg.StrOpt('images_rbd_ceph_conf',
941941
default='', # default determined by librados
942942
help='Path to the ceph configuration file to use'),
943+
cfg.StrOpt('images_rbd_glance_store_name',
944+
default='',
945+
help="""
946+
The name of the Glance store that represents the rbd cluster in use by
947+
this node. If set, this will allow Nova to request that Glance copy an
948+
image from an existing non-local store into the one named by this option
949+
before booting so that proper Copy-on-Write behavior is maintained.
950+
951+
Related options:
952+
953+
* images_type - must be set to ``rbd``
954+
* images_rbd_glance_copy_poll_interval - controls the status poll frequency
955+
* images_rbd_glance_copy_timeout - controls the overall copy timeout
956+
"""),
957+
cfg.IntOpt('images_rbd_glance_copy_poll_interval',
958+
default=15,
959+
help="""
960+
The interval in seconds with which to poll Glance after asking for it
961+
to copy an image to the local rbd store. This affects how often we ask
962+
Glance to report on copy completion, and thus should be short enough that
963+
we notice quickly, but not too aggressive that we generate undue load on
964+
the Glance server.
965+
966+
Related options:
967+
968+
* images_type - must be set to ``rbd``
969+
* images_rbd_glance_store_name - must be set to a store name
970+
"""),
971+
cfg.IntOpt('images_rbd_glance_copy_timeout',
972+
default=600,
973+
help="""
974+
The overall maximum time we will wait for Glance to complete an image
975+
copy to our local rbd store. This should be long enough to allow large
976+
images to be copied over the network link between our local store and the
977+
one where images typically reside. The downside of setting this too long
978+
is just to catch the case where the image copy is stalled or proceeding too
979+
slowly to be useful. Actual errors will be reported by Glance and noticed
980+
according to the poll interval.
981+
982+
Related options:
983+
* images_type - must be set to ``rbd``
984+
* images_rbd_glance_store_name - must be set to a store name
985+
* images_rbd_glance_copy_poll_interval - controls the failure time-to-notice
986+
"""),
943987
cfg.StrOpt('hw_disk_discard',
944988
choices=('ignore', 'unmap'),
945989
help="""

nova/tests/unit/virt/libvirt/test_imagebackend.py

Lines changed: 225 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
import mock
2626
from oslo_concurrency import lockutils
2727
from oslo_config import fixture as config_fixture
28+
from oslo_service import loopingcall
2829
from oslo_utils import imageutils
2930
from oslo_utils import units
3031
from oslo_utils import uuidutils
@@ -1768,6 +1769,230 @@ def test_cleanup_direct_snapshot_destroy_volume(self):
17681769
mock_destroy.assert_called_once_with(image.rbd_name,
17691770
pool=image.driver.pool)
17701771

1772+
@mock.patch('nova.virt.libvirt.imagebackend.IMAGE_API')
1773+
def test_copy_to_store(self, mock_imgapi):
1774+
# Test copy_to_store() happy path where we ask for the image
1775+
# to be copied, it goes into progress and then completes.
1776+
self.flags(images_rbd_glance_copy_poll_interval=0,
1777+
group='libvirt')
1778+
self.flags(images_rbd_glance_store_name='store',
1779+
group='libvirt')
1780+
image = self.image_class(self.INSTANCE, self.NAME)
1781+
mock_imgapi.get.side_effect = [
1782+
# Simulate a race between starting the copy and the first poll
1783+
{'stores': []},
1784+
# Second poll shows it in progress
1785+
{'os_glance_importing_to_stores': ['store'],
1786+
'stores': []},
1787+
# Third poll shows it has also been copied to a non-local store
1788+
{'os_glance_importing_to_stores': ['store'],
1789+
'stores': ['other']},
1790+
# Should-be-last poll shows it complete
1791+
{'os_glance_importing_to_stores': [],
1792+
'stores': ['other', 'store']},
1793+
]
1794+
image.copy_to_store(self.CONTEXT, {'id': 'foo'})
1795+
mock_imgapi.copy_image_to_store.assert_called_once_with(
1796+
self.CONTEXT, 'foo', 'store')
1797+
self.assertEqual(4, mock_imgapi.get.call_count)
1798+
1799+
@mock.patch('nova.virt.libvirt.imagebackend.IMAGE_API')
1800+
def test_copy_to_store_race_with_existing(self, mock_imgapi):
1801+
# Test copy_to_store() where we race to ask Glance to do the
1802+
# copy with another node. One of us will get a BadRequest, which
1803+
# should not cause us to fail. If our desired store is now
1804+
# in progress, continue to wait like we would have if we had
1805+
# won the race.
1806+
self.flags(images_rbd_glance_copy_poll_interval=0,
1807+
group='libvirt')
1808+
self.flags(images_rbd_glance_store_name='store',
1809+
group='libvirt')
1810+
image = self.image_class(self.INSTANCE, self.NAME)
1811+
1812+
mock_imgapi.copy_image_to_store.side_effect = (
1813+
exception.ImageBadRequest(image_id='foo',
1814+
response='already in progress'))
1815+
# Make the first poll indicate that the image has already
1816+
# been copied
1817+
mock_imgapi.get.return_value = {'stores': ['store', 'other']}
1818+
1819+
# Despite the (expected) exception from the copy, we should
1820+
# not raise here if the subsequent poll works.
1821+
image.copy_to_store(self.CONTEXT, {'id': 'foo'})
1822+
1823+
mock_imgapi.get.assert_called_once_with(self.CONTEXT,
1824+
'foo',
1825+
include_locations=True)
1826+
mock_imgapi.copy_image_to_store.assert_called_once_with(
1827+
self.CONTEXT, 'foo', 'store')
1828+
1829+
@mock.patch('nova.virt.libvirt.imagebackend.IMAGE_API')
1830+
def test_copy_to_store_import_impossible(self, mock_imgapi):
1831+
# Test copy_to_store() where Glance tells us that the image
1832+
# is not copy-able for some reason (like it is not active yet
1833+
# or some other workflow reason).
1834+
image = self.image_class(self.INSTANCE, self.NAME)
1835+
mock_imgapi.copy_image_to_store.side_effect = (
1836+
exception.ImageImportImpossible(image_id='foo',
1837+
reason='because tests'))
1838+
self.assertRaises(exception.ImageUnacceptable,
1839+
image.copy_to_store,
1840+
self.CONTEXT, {'id': 'foo'})
1841+
1842+
@mock.patch('nova.virt.libvirt.imagebackend.IMAGE_API')
1843+
def test_copy_to_store_import_failed_other_reason(self, mock_imgapi):
1844+
# Test copy_to_store() where some unexpected failure gets raised.
1845+
# We should bubble that up so it gets all the way back to the caller
1846+
# of the clone() itself, which can handle it independent of one of
1847+
# the image-specific exceptions.
1848+
image = self.image_class(self.INSTANCE, self.NAME)
1849+
mock_imgapi.copy_image_to_store.side_effect = test.TestingException
1850+
# Make sure any other exception makes it through, as those are already
1851+
# expected failures by the callers of the imagebackend code.
1852+
self.assertRaises(test.TestingException,
1853+
image.copy_to_store,
1854+
self.CONTEXT, {'id': 'foo'})
1855+
1856+
@mock.patch('nova.virt.libvirt.imagebackend.IMAGE_API')
1857+
def test_copy_to_store_import_failed_in_progress(self, mock_imgapi):
1858+
# Test copy_to_store() in the situation where we ask for the copy,
1859+
# things start to look good (in progress) and later get reported
1860+
# as failed.
1861+
self.flags(images_rbd_glance_copy_poll_interval=0,
1862+
group='libvirt')
1863+
self.flags(images_rbd_glance_store_name='store',
1864+
group='libvirt')
1865+
image = self.image_class(self.INSTANCE, self.NAME)
1866+
mock_imgapi.get.side_effect = [
1867+
# First poll shows it in progress
1868+
{'os_glance_importing_to_stores': ['store'],
1869+
'stores': []},
1870+
# Second poll shows it failed
1871+
{'os_glance_failed_import': ['store'],
1872+
'stores': []},
1873+
]
1874+
exc = self.assertRaises(exception.ImageUnacceptable,
1875+
image.copy_to_store,
1876+
self.CONTEXT, {'id': 'foo'})
1877+
self.assertIn('unsuccessful because', str(exc))
1878+
1879+
@mock.patch.object(loopingcall.FixedIntervalWithTimeoutLoopingCall,
1880+
'start')
1881+
@mock.patch('nova.virt.libvirt.imagebackend.IMAGE_API')
1882+
def test_copy_to_store_import_failed_timeout(self, mock_imgapi,
1883+
mock_timer_start):
1884+
# Test copy_to_store() simulating the case where we timeout waiting
1885+
# for Glance to do the copy.
1886+
self.flags(images_rbd_glance_store_name='store',
1887+
group='libvirt')
1888+
image = self.image_class(self.INSTANCE, self.NAME)
1889+
mock_timer_start.side_effect = loopingcall.LoopingCallTimeOut()
1890+
exc = self.assertRaises(exception.ImageUnacceptable,
1891+
image.copy_to_store,
1892+
self.CONTEXT, {'id': 'foo'})
1893+
self.assertIn('timed out', str(exc))
1894+
mock_imgapi.copy_image_to_store.assert_called_once_with(
1895+
self.CONTEXT, 'foo', 'store')
1896+
1897+
@mock.patch('nova.virt.libvirt.storage.rbd_utils.RBDDriver')
1898+
@mock.patch('nova.virt.libvirt.imagebackend.IMAGE_API')
1899+
def test_clone_copy_to_store(self, mock_imgapi, mock_driver_):
1900+
# Call image.clone() in a way that will cause it to fall through
1901+
# the locations check to the copy-to-store behavior, and assert
1902+
# that after the copy, we recurse (without becoming infinite) and
1903+
# do the check again.
1904+
self.flags(images_rbd_glance_store_name='store', group='libvirt')
1905+
fake_image = {
1906+
'id': 'foo',
1907+
'disk_format': 'raw',
1908+
'locations': ['fake'],
1909+
}
1910+
mock_imgapi.get.return_value = fake_image
1911+
mock_driver = mock_driver_.return_value
1912+
mock_driver.is_cloneable.side_effect = [False, True]
1913+
image = self.image_class(self.INSTANCE, self.NAME)
1914+
with mock.patch.object(image, 'copy_to_store') as mock_copy:
1915+
image.clone(self.CONTEXT, 'foo')
1916+
mock_copy.assert_called_once_with(self.CONTEXT, fake_image)
1917+
mock_driver.is_cloneable.assert_has_calls([
1918+
# First call is the initial check
1919+
mock.call('fake', fake_image),
1920+
# Second call with the same location must be because we
1921+
# recursed after the copy-to-store operation
1922+
mock.call('fake', fake_image)])
1923+
1924+
@mock.patch('nova.virt.libvirt.storage.rbd_utils.RBDDriver')
1925+
@mock.patch('nova.virt.libvirt.imagebackend.IMAGE_API')
1926+
def test_clone_copy_to_store_failed(self, mock_imgapi, mock_driver_):
1927+
# Call image.clone() in a way that will cause it to fall through
1928+
# the locations check to the copy-to-store behavior, but simulate
1929+
# some situation where we didn't actually copy the image and the
1930+
# recursed check does not succeed. Assert that we do not copy again,
1931+
# nor recurse again, and raise the expected error.
1932+
self.flags(images_rbd_glance_store_name='store', group='libvirt')
1933+
fake_image = {
1934+
'id': 'foo',
1935+
'disk_format': 'raw',
1936+
'locations': ['fake'],
1937+
}
1938+
mock_imgapi.get.return_value = fake_image
1939+
mock_driver = mock_driver_.return_value
1940+
mock_driver.is_cloneable.side_effect = [False, False]
1941+
image = self.image_class(self.INSTANCE, self.NAME)
1942+
with mock.patch.object(image, 'copy_to_store') as mock_copy:
1943+
self.assertRaises(exception.ImageUnacceptable,
1944+
image.clone, self.CONTEXT, 'foo')
1945+
mock_copy.assert_called_once_with(self.CONTEXT, fake_image)
1946+
mock_driver.is_cloneable.assert_has_calls([
1947+
# First call is the initial check
1948+
mock.call('fake', fake_image),
1949+
# Second call with the same location must be because we
1950+
# recursed after the copy-to-store operation
1951+
mock.call('fake', fake_image)])
1952+
1953+
@mock.patch('nova.virt.libvirt.storage.rbd_utils.RBDDriver')
1954+
@mock.patch('nova.virt.libvirt.imagebackend.IMAGE_API')
1955+
def test_clone_without_needed_copy(self, mock_imgapi, mock_driver_):
1956+
# Call image.clone() in a way that will cause it to pass the locations
1957+
# check the first time. Assert that we do not call copy-to-store
1958+
# nor recurse.
1959+
self.flags(images_rbd_glance_store_name='store', group='libvirt')
1960+
fake_image = {
1961+
'id': 'foo',
1962+
'disk_format': 'raw',
1963+
'locations': ['fake'],
1964+
}
1965+
mock_imgapi.get.return_value = fake_image
1966+
mock_driver = mock_driver_.return_value
1967+
mock_driver.is_cloneable.return_value = True
1968+
image = self.image_class(self.INSTANCE, self.NAME)
1969+
with mock.patch.object(image, 'copy_to_store') as mock_copy:
1970+
image.clone(self.CONTEXT, 'foo')
1971+
mock_copy.assert_not_called()
1972+
mock_driver.is_cloneable.assert_called_once_with('fake', fake_image)
1973+
1974+
@mock.patch('nova.virt.libvirt.storage.rbd_utils.RBDDriver')
1975+
@mock.patch('nova.virt.libvirt.imagebackend.IMAGE_API')
1976+
def test_clone_copy_not_configured(self, mock_imgapi, mock_driver_):
1977+
# Call image.clone() in a way that will cause it to fail the locations
1978+
# check the first time. Assert that if the store name is not configured
1979+
# we do not try to copy-to-store and just raise the original exception
1980+
# indicating that the image is not reachable.
1981+
fake_image = {
1982+
'id': 'foo',
1983+
'disk_format': 'raw',
1984+
'locations': ['fake'],
1985+
}
1986+
mock_imgapi.get.return_value = fake_image
1987+
mock_driver = mock_driver_.return_value
1988+
mock_driver.is_cloneable.return_value = False
1989+
image = self.image_class(self.INSTANCE, self.NAME)
1990+
with mock.patch.object(image, 'copy_to_store') as mock_copy:
1991+
self.assertRaises(exception.ImageUnacceptable,
1992+
image.clone, self.CONTEXT, 'foo')
1993+
mock_copy.assert_not_called()
1994+
mock_driver.is_cloneable.assert_called_once_with('fake', fake_image)
1995+
17711996

17721997
class PloopTestCase(_ImageTestCase, test.NoDBTestCase):
17731998
SIZE = 1024

nova/virt/libvirt/imagebackend.py

Lines changed: 78 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
from oslo_concurrency import processutils
2626
from oslo_log import log as logging
2727
from oslo_serialization import jsonutils
28+
from oslo_service import loopingcall
2829
from oslo_utils import excutils
2930
from oslo_utils import fileutils
3031
from oslo_utils import strutils
@@ -958,7 +959,77 @@ def snapshot_extract(self, target, out_format):
958959
def is_shared_block_storage():
959960
return True
960961

961-
def clone(self, context, image_id_or_uri):
962+
def copy_to_store(self, context, image_meta):
963+
store_name = CONF.libvirt.images_rbd_glance_store_name
964+
image_id = image_meta['id']
965+
try:
966+
IMAGE_API.copy_image_to_store(context, image_id, store_name)
967+
except exception.ImageBadRequest:
968+
# NOTE(danms): This means that we raced with another node to start
969+
# the copy. Fall through to polling the image for completion
970+
pass
971+
except exception.ImageImportImpossible as exc:
972+
# NOTE(danms): This means we can not do this operation at all,
973+
# so fold this into the kind of imagebackend failure that is
974+
# expected by our callers
975+
raise exception.ImageUnacceptable(image_id=image_id,
976+
reason=str(exc))
977+
978+
def _wait_for_copy():
979+
image = IMAGE_API.get(context, image_id, include_locations=True)
980+
if store_name in image.get('os_glance_failed_import', []):
981+
# Our store is reported as failed
982+
raise loopingcall.LoopingCallDone('failed import')
983+
elif (store_name not in image.get('os_glance_importing_to_stores',
984+
[]) and
985+
store_name in image['stores']):
986+
# No longer importing and our store is listed in the stores
987+
raise loopingcall.LoopingCallDone()
988+
else:
989+
LOG.debug('Glance reports copy of image %(image)s to '
990+
'rbd store %(store)s is still in progress',
991+
{'image': image_id,
992+
'store': store_name})
993+
return True
994+
995+
LOG.info('Asking glance to copy image %(image)s to our '
996+
'rbd store %(store)s',
997+
{'image': image_id,
998+
'store': store_name})
999+
1000+
timer = loopingcall.FixedIntervalWithTimeoutLoopingCall(_wait_for_copy)
1001+
1002+
# NOTE(danms): We *could* do something more complicated like try
1003+
# to scale our polling interval based on image size. The problem with
1004+
# that is that we do not get progress indication from Glance, so if
1005+
# we scale our interval to something long, and happen to poll right
1006+
# near the end of the copy, we will wait another long interval before
1007+
# realizing that the copy is complete. A simple interval per compute
1008+
# allows an operator to set this short on central/fast/inexpensive
1009+
# computes, and longer on nodes that are remote/slow/expensive across
1010+
# a slower link.
1011+
interval = CONF.libvirt.images_rbd_glance_copy_poll_interval
1012+
timeout = CONF.libvirt.images_rbd_glance_copy_timeout
1013+
try:
1014+
result = timer.start(interval=interval, timeout=timeout).wait()
1015+
except loopingcall.LoopingCallTimeOut:
1016+
raise exception.ImageUnacceptable(
1017+
image_id=image_id,
1018+
reason='Copy to store %(store)s timed out' % {
1019+
'store': store_name})
1020+
1021+
if result is not True:
1022+
raise exception.ImageUnacceptable(
1023+
image_id=image_id,
1024+
reason=('Copy to store %(store)s unsuccessful '
1025+
'because: %(reason)s') % {'store': store_name,
1026+
'reason': result})
1027+
1028+
LOG.info('Image %(image)s copied to rbd store %(store)s',
1029+
{'image': image_id,
1030+
'store': store_name})
1031+
1032+
def clone(self, context, image_id_or_uri, copy_to_store=True):
9621033
image_meta = IMAGE_API.get(context, image_id_or_uri,
9631034
include_locations=True)
9641035
locations = image_meta['locations']
@@ -975,6 +1046,12 @@ def clone(self, context, image_id_or_uri):
9751046
LOG.debug('Selected location: %(loc)s', {'loc': location})
9761047
return self.driver.clone(location, self.rbd_name)
9771048

1049+
# Not clone-able in our ceph, so try to get glance to copy it for us
1050+
# and then retry
1051+
if CONF.libvirt.images_rbd_glance_store_name and copy_to_store:
1052+
self.copy_to_store(context, image_meta)
1053+
return self.clone(context, image_id_or_uri, copy_to_store=False)
1054+
9781055
reason = _('No image locations are accessible')
9791056
raise exception.ImageUnacceptable(image_id=image_id_or_uri,
9801057
reason=reason)

0 commit comments

Comments
 (0)