Skip to content

Commit 0807b7a

Browse files
committed
Enable cpus when an instance is spawning
By this patch, we now automatically power down or up cores when an instance is either stopped or started. Also, by default, we now powersave or offline dedicated cores when starting the compute service. Implements: blueprint libvirt-cpu-state-mgmt Change-Id: Id645fd1ba909683af903f3b8f11c7f06db3401cb
1 parent 96f9518 commit 0807b7a

File tree

9 files changed

+625
-0
lines changed

9 files changed

+625
-0
lines changed

nova/conf/libvirt.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1482,6 +1482,11 @@
14821482
cfg.BoolOpt('cpu_power_management',
14831483
default=False,
14841484
help='Use libvirt to manage CPU cores performance.'),
1485+
cfg.StrOpt('cpu_power_management_strategy',
1486+
choices=['cpu_state', 'governor'],
1487+
default='cpu_state',
1488+
help='Tuning strategy to reduce CPU power consumption when '
1489+
'unused'),
14851490
cfg.StrOpt('cpu_power_governor_low',
14861491
default='powersave',
14871492
help='Governor to use in order '

nova/tests/fixtures/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@
1616
from .cinder import CinderFixture # noqa: F401
1717
from .conf import ConfFixture # noqa: F401, F403
1818
from .cyborg import CyborgFixture # noqa: F401
19+
from .filesystem import SysFileSystemFixture # noqa: F401
20+
from .filesystem import TempFileSystemFixture # noqa: F401
1921
from .glance import GlanceFixture # noqa: F401
2022
from .libvirt import LibvirtFixture # noqa: F401
2123
from .libvirt_imagebackend import LibvirtImageBackendFixture # noqa: F401

nova/tests/fixtures/filesystem.py

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
# Licensed under the Apache License, Version 2.0 (the "License"); you may
2+
# not use this file except in compliance with the License. You may obtain
3+
# a copy of the License at
4+
#
5+
# http://www.apache.org/licenses/LICENSE-2.0
6+
#
7+
# Unless required by applicable law or agreed to in writing, software
8+
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
9+
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
10+
# License for the specific language governing permissions and limitations
11+
# under the License.
12+
13+
import os
14+
import shutil
15+
import tempfile
16+
from unittest import mock
17+
18+
import fixtures
19+
20+
from nova import filesystem
21+
from nova.virt.libvirt.cpu import core
22+
23+
24+
SYS = 'sys'
25+
26+
27+
class TempFileSystemFixture(fixtures.Fixture):
28+
"""Creates a fake / filesystem"""
29+
30+
def _setUp(self):
31+
self.temp_dir = tempfile.TemporaryDirectory(prefix='fake_fs')
32+
# NOTE(sbauza): I/O disk errors may raise an exception here, as we
33+
# don't ignore them. If that's causing a problem in our CI jobs, the
34+
# recommended solution is to use shutil.rmtree instead of cleanup()
35+
# with ignore_errors parameter set to True (or wait for the minimum
36+
# python version to be 3.10 as TemporaryDirectory will provide
37+
# ignore_cleanup_errors parameter)
38+
self.addCleanup(self.temp_dir.cleanup)
39+
40+
41+
class SysFileSystemFixture(TempFileSystemFixture):
42+
"""Creates a fake /sys filesystem"""
43+
44+
def __init__(self, cpus_supported=None):
45+
self.cpus_supported = cpus_supported or 10
46+
47+
def _setUp(self):
48+
super()._setUp()
49+
self.sys_path = os.path.join(self.temp_dir.name, SYS)
50+
self.addCleanup(shutil.rmtree, self.sys_path, ignore_errors=True)
51+
52+
sys_patcher = mock.patch(
53+
'nova.filesystem.SYS',
54+
new_callable=mock.PropertyMock(return_value=self.sys_path))
55+
self.sys_mock = sys_patcher.start()
56+
self.addCleanup(sys_patcher.stop)
57+
58+
avail_path_patcher = mock.patch(
59+
'nova.virt.libvirt.cpu.core.AVAILABLE_PATH',
60+
new_callable=mock.PropertyMock(
61+
return_value=os.path.join(self.sys_path,
62+
'devices/system/cpu/present')))
63+
self.avail_path_mock = avail_path_patcher.start()
64+
self.addCleanup(avail_path_patcher.stop)
65+
66+
cpu_path_patcher = mock.patch(
67+
'nova.virt.libvirt.cpu.core.CPU_PATH_TEMPLATE',
68+
new_callable=mock.PropertyMock(
69+
return_value=os.path.join(self.sys_path,
70+
'devices/system/cpu/cpu%(core)s')))
71+
self.cpu_path_mock = cpu_path_patcher.start()
72+
self.addCleanup(cpu_path_patcher.stop)
73+
74+
for cpu_nr in range(self.cpus_supported):
75+
cpu_dir = os.path.join(self.cpu_path_mock % {'core': cpu_nr})
76+
os.makedirs(os.path.join(cpu_dir, 'cpufreq'))
77+
filesystem.write_sys(
78+
os.path.join(cpu_dir, 'cpufreq/scaling_governor'),
79+
data='powersave')
80+
filesystem.write_sys(core.AVAILABLE_PATH,
81+
f'0-{self.cpus_supported - 1}')
Lines changed: 270 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,270 @@
1+
# Licensed under the Apache License, Version 2.0 (the "License"); you may
2+
# not use this file except in compliance with the License. You may obtain
3+
# a copy of the License at
4+
#
5+
# http://www.apache.org/licenses/LICENSE-2.0
6+
#
7+
# Unless required by applicable law or agreed to in writing, software
8+
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
9+
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
10+
# License for the specific language governing permissions and limitations
11+
# under the License.
12+
13+
from unittest import mock
14+
15+
import fixtures
16+
17+
from nova import context as nova_context
18+
from nova import exception
19+
from nova import objects
20+
from nova.tests import fixtures as nova_fixtures
21+
from nova.tests.fixtures import libvirt as fakelibvirt
22+
from nova.tests.functional.libvirt import base
23+
from nova.virt import hardware
24+
from nova.virt.libvirt import cpu
25+
26+
27+
class PowerManagementTestsBase(base.ServersTestBase):
28+
29+
ADDITIONAL_FILTERS = ['NUMATopologyFilter']
30+
31+
ADMIN_API = True
32+
33+
def setUp(self):
34+
super(PowerManagementTestsBase, self).setUp()
35+
36+
self.ctxt = nova_context.get_admin_context()
37+
38+
# Mock the 'NUMATopologyFilter' filter, as most tests need to inspect
39+
# this
40+
host_manager = self.scheduler.manager.host_manager
41+
numa_filter_class = host_manager.filter_cls_map['NUMATopologyFilter']
42+
host_pass_mock = mock.Mock(wraps=numa_filter_class().host_passes)
43+
_p = mock.patch('nova.scheduler.filters'
44+
'.numa_topology_filter.NUMATopologyFilter.host_passes',
45+
side_effect=host_pass_mock)
46+
self.mock_filter = _p.start()
47+
self.addCleanup(_p.stop)
48+
49+
# for the sake of resizing, we need to patch the two methods below
50+
self.useFixture(fixtures.MockPatch(
51+
'nova.virt.libvirt.LibvirtDriver._get_instance_disk_info',
52+
return_value=[]))
53+
self.useFixture(fixtures.MockPatch('os.rename'))
54+
55+
self.useFixture(nova_fixtures.PrivsepFixture())
56+
57+
# Defining the main flavor for 4 vCPUs all pinned
58+
self.extra_spec = {
59+
'hw:cpu_policy': 'dedicated',
60+
'hw:cpu_thread_policy': 'prefer',
61+
}
62+
self.pcpu_flavor_id = self._create_flavor(
63+
vcpu=4, extra_spec=self.extra_spec)
64+
65+
def _assert_server_cpus_state(self, server, expected='online'):
66+
inst = objects.Instance.get_by_uuid(self.ctxt, server['id'])
67+
if not inst.numa_topology:
68+
self.fail('Instance should have a NUMA topology in order to know '
69+
'its physical CPUs')
70+
instance_pcpus = inst.numa_topology.cpu_pinning
71+
self._assert_cpu_set_state(instance_pcpus, expected=expected)
72+
return instance_pcpus
73+
74+
def _assert_cpu_set_state(self, cpu_set, expected='online'):
75+
for i in cpu_set:
76+
core = cpu.Core(i)
77+
if expected == 'online':
78+
self.assertTrue(core.online, f'{i} is not online')
79+
elif expected == 'offline':
80+
self.assertFalse(core.online, f'{i} is online')
81+
elif expected == 'powersave':
82+
self.assertEqual('powersave', core.governor)
83+
elif expected == 'performance':
84+
self.assertEqual('performance', core.governor)
85+
86+
87+
class PowerManagementTests(PowerManagementTestsBase):
88+
"""Test suite for a single host with 9 dedicated cores and 1 used for OS"""
89+
90+
def setUp(self):
91+
super(PowerManagementTests, self).setUp()
92+
93+
self.useFixture(nova_fixtures.SysFileSystemFixture())
94+
95+
# Definining the CPUs to be pinned.
96+
self.flags(cpu_dedicated_set='1-9', cpu_shared_set=None,
97+
group='compute')
98+
self.flags(vcpu_pin_set=None)
99+
self.flags(cpu_power_management=True, group='libvirt')
100+
101+
self.flags(allow_resize_to_same_host=True)
102+
self.host_info = fakelibvirt.HostInfo(cpu_nodes=1, cpu_sockets=1,
103+
cpu_cores=5, cpu_threads=2)
104+
self.compute1 = self.start_compute(host_info=self.host_info,
105+
hostname='compute1')
106+
107+
# All cores are shutdown at startup, let's check.
108+
cpu_dedicated_set = hardware.get_cpu_dedicated_set()
109+
self._assert_cpu_set_state(cpu_dedicated_set, expected='offline')
110+
111+
def test_hardstop_compute_service_if_wrong_opt(self):
112+
self.flags(cpu_dedicated_set=None, cpu_shared_set=None,
113+
group='compute')
114+
self.flags(vcpu_pin_set=None)
115+
self.flags(cpu_power_management=True, group='libvirt')
116+
self.assertRaises(exception.InvalidConfiguration,
117+
self.start_compute, host_info=self.host_info,
118+
hostname='compute2')
119+
120+
def test_create_server(self):
121+
server = self._create_server(
122+
flavor_id=self.pcpu_flavor_id,
123+
expected_state='ACTIVE')
124+
# Let's verify that the pinned CPUs are now online
125+
self._assert_server_cpus_state(server, expected='online')
126+
127+
# Verify that the unused CPUs are still offline
128+
inst = objects.Instance.get_by_uuid(self.ctxt, server['id'])
129+
instance_pcpus = inst.numa_topology.cpu_pinning
130+
cpu_dedicated_set = hardware.get_cpu_dedicated_set()
131+
unused_cpus = cpu_dedicated_set - instance_pcpus
132+
self._assert_cpu_set_state(unused_cpus, expected='offline')
133+
134+
def test_stop_start_server(self):
135+
server = self._create_server(
136+
flavor_id=self.pcpu_flavor_id,
137+
expected_state='ACTIVE')
138+
139+
server = self._stop_server(server)
140+
# Let's verify that the pinned CPUs are now stopped...
141+
self._assert_server_cpus_state(server, expected='offline')
142+
143+
server = self._start_server(server)
144+
# ...and now, they should be back.
145+
self._assert_server_cpus_state(server, expected='online')
146+
147+
def test_resize(self):
148+
server = self._create_server(
149+
flavor_id=self.pcpu_flavor_id,
150+
expected_state='ACTIVE')
151+
server_pcpus = self._assert_server_cpus_state(server,
152+
expected='online')
153+
154+
new_flavor_id = self._create_flavor(
155+
vcpu=5, extra_spec=self.extra_spec)
156+
self._resize_server(server, new_flavor_id)
157+
server2_pcpus = self._assert_server_cpus_state(server,
158+
expected='online')
159+
# Even if the resize is not confirmed yet, the original guest is now
160+
# destroyed so the cores are now offline.
161+
self._assert_cpu_set_state(server_pcpus, expected='offline')
162+
163+
# let's revert the resize
164+
self._revert_resize(server)
165+
# So now the original CPUs will be online again, while the previous
166+
# cores should be back offline.
167+
self._assert_cpu_set_state(server_pcpus, expected='online')
168+
self._assert_cpu_set_state(server2_pcpus, expected='offline')
169+
170+
def test_changing_strategy_fails(self):
171+
# As a reminder, all cores have been shutdown before.
172+
# Now we want to change the strategy and then we restart the service
173+
self.flags(cpu_power_management_strategy='governor', group='libvirt')
174+
# See, this is not possible as we would have offline CPUs.
175+
self.assertRaises(exception.InvalidConfiguration,
176+
self.restart_compute_service, hostname='compute1')
177+
178+
179+
class PowerManagementTestsGovernor(PowerManagementTestsBase):
180+
"""Test suite for speific governor usage (same 10-core host)"""
181+
182+
def setUp(self):
183+
super(PowerManagementTestsGovernor, self).setUp()
184+
185+
self.useFixture(nova_fixtures.SysFileSystemFixture())
186+
187+
# Definining the CPUs to be pinned.
188+
self.flags(cpu_dedicated_set='1-9', cpu_shared_set=None,
189+
group='compute')
190+
self.flags(vcpu_pin_set=None)
191+
self.flags(cpu_power_management=True, group='libvirt')
192+
self.flags(cpu_power_management_strategy='governor', group='libvirt')
193+
194+
self.flags(allow_resize_to_same_host=True)
195+
self.host_info = fakelibvirt.HostInfo(cpu_nodes=1, cpu_sockets=1,
196+
cpu_cores=5, cpu_threads=2)
197+
self.compute1 = self.start_compute(host_info=self.host_info,
198+
hostname='compute1')
199+
200+
def test_create(self):
201+
cpu_dedicated_set = hardware.get_cpu_dedicated_set()
202+
# With the governor strategy, cores are still online but run with a
203+
# powersave governor.
204+
self._assert_cpu_set_state(cpu_dedicated_set, expected='powersave')
205+
206+
# Now, start an instance
207+
server = self._create_server(
208+
flavor_id=self.pcpu_flavor_id,
209+
expected_state='ACTIVE')
210+
# When pinned cores are run, the governor state is now performance
211+
self._assert_server_cpus_state(server, expected='performance')
212+
213+
def test_changing_strategy_fails(self):
214+
# Arbitratly set a core governor strategy to be performance
215+
cpu.Core(1).set_high_governor()
216+
# and then forget about it while changing the strategy.
217+
self.flags(cpu_power_management_strategy='cpu_state', group='libvirt')
218+
# This time, this wouldn't be acceptable as some core would have a
219+
# difference performance while Nova would only online/offline it.
220+
self.assertRaises(exception.InvalidConfiguration,
221+
self.restart_compute_service, hostname='compute1')
222+
223+
224+
class PowerManagementMixedInstances(PowerManagementTestsBase):
225+
"""Test suite for a single host with 6 dedicated cores, 3 shared and one
226+
OS-restricted.
227+
"""
228+
229+
def setUp(self):
230+
super(PowerManagementMixedInstances, self).setUp()
231+
232+
self.useFixture(nova_fixtures.SysFileSystemFixture())
233+
234+
# Definining 6 CPUs to be dedicated, not all of them in a series.
235+
self.flags(cpu_dedicated_set='1-3,5-7', cpu_shared_set='4,8-9',
236+
group='compute')
237+
self.flags(vcpu_pin_set=None)
238+
self.flags(cpu_power_management=True, group='libvirt')
239+
240+
self.host_info = fakelibvirt.HostInfo(cpu_nodes=1, cpu_sockets=1,
241+
cpu_cores=5, cpu_threads=2)
242+
self.compute1 = self.start_compute(host_info=self.host_info,
243+
hostname='compute1')
244+
245+
# Make sure only 6 are offline now
246+
cpu_dedicated_set = hardware.get_cpu_dedicated_set()
247+
self._assert_cpu_set_state(cpu_dedicated_set, expected='offline')
248+
249+
# cores 4 and 8-9 should be online
250+
self._assert_cpu_set_state({4, 8, 9}, expected='online')
251+
252+
def test_standard_server_works_and_passes(self):
253+
254+
std_flavor_id = self._create_flavor(vcpu=2)
255+
self._create_server(flavor_id=std_flavor_id, expected_state='ACTIVE')
256+
257+
# Since this is an instance with floating vCPUs on the shared set, we
258+
# can only lookup the host CPUs and see they haven't changed state.
259+
cpu_dedicated_set = hardware.get_cpu_dedicated_set()
260+
self._assert_cpu_set_state(cpu_dedicated_set, expected='offline')
261+
self._assert_cpu_set_state({4, 8, 9}, expected='online')
262+
263+
# We can now try to boot an instance with pinned CPUs to test the mix
264+
pinned_server = self._create_server(
265+
flavor_id=self.pcpu_flavor_id,
266+
expected_state='ACTIVE')
267+
# We'll see that its CPUs are now online
268+
self._assert_server_cpus_state(pinned_server, expected='online')
269+
# but it doesn't change the shared set
270+
self._assert_cpu_set_state({4, 8, 9}, expected='online')

0 commit comments

Comments
 (0)