Skip to content

Commit 3d818c3

Browse files
committed
Make allocation candidates available for scheduler filters
This patch extends the HostState object with an allocation_candidates list populated by the scheduler manager. Also this changes the generic scheduler logic to allocate the candidate of the selected host based on the candidates in the host state. So after this patch scheduler filters can be extended to filter the allocation_candidates list of the HostState object while processing a host and restrict which candidate can be allocated if the host passes the all the filters. Potentially all candidates can be removed by multiple consecutive filters making the host as a non viable scheduling target. blueprint: pci-device-tracking-in-placement Change-Id: Id0afff271d345a94aa83fc886e9c3231c3ff2570
1 parent e96601c commit 3d818c3

File tree

4 files changed

+872
-128
lines changed

4 files changed

+872
-128
lines changed

nova/scheduler/filters/__init__.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,9 @@ class BaseHostFilter(filters.BaseFilter):
2828
# other parameters. We care about running policy filters (i.e.
2929
# ImagePropertiesFilter) but not things that check usage on the
3030
# existing compute node, etc.
31+
# This also means that filters marked with RUN_ON_REBUILD = True cannot
32+
# filter on allocation candidates or need to handle the rebuild case
33+
# specially.
3134
RUN_ON_REBUILD = False
3235

3336
def _filter_one(self, obj, spec):

nova/scheduler/host_manager.py

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,8 @@ def __init__(self, host, node, cell_uuid):
153153

154154
self.updated = None
155155

156+
self.allocation_candidates = []
157+
156158
def update(self, compute=None, service=None, aggregates=None,
157159
inst_dict=None):
158160
"""Update all information about a host."""
@@ -314,13 +316,21 @@ def _locked_consume_from_request(self, spec_obj):
314316
self.num_io_ops += 1
315317

316318
def __repr__(self):
317-
return ("(%(host)s, %(node)s) ram: %(free_ram)sMB "
318-
"disk: %(free_disk)sMB io_ops: %(num_io_ops)s "
319-
"instances: %(num_instances)s" %
320-
{'host': self.host, 'node': self.nodename,
321-
'free_ram': self.free_ram_mb, 'free_disk': self.free_disk_mb,
322-
'num_io_ops': self.num_io_ops,
323-
'num_instances': self.num_instances})
319+
return (
320+
"(%(host)s, %(node)s) ram: %(free_ram)sMB "
321+
"disk: %(free_disk)sMB io_ops: %(num_io_ops)s "
322+
"instances: %(num_instances)s, "
323+
"allocation_candidates: %(num_a_c)s"
324+
% {
325+
"host": self.host,
326+
"node": self.nodename,
327+
"free_ram": self.free_ram_mb,
328+
"free_disk": self.free_disk_mb,
329+
"num_io_ops": self.num_io_ops,
330+
"num_instances": self.num_instances,
331+
"num_a_c": len(self.allocation_candidates),
332+
}
333+
)
324334

325335

326336
class HostManager(object):

nova/scheduler/manager.py

Lines changed: 85 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
"""
2121

2222
import collections
23+
import copy
2324
import random
2425

2526
from oslo_log import log as logging
@@ -299,12 +300,29 @@ def _schedule(
299300
# host, we virtually consume resources on it so subsequent
300301
# selections can adjust accordingly.
301302

303+
def hosts_with_alloc_reqs(hosts_gen):
304+
"""Extend the HostState objects returned by the generator with
305+
the allocation requests of that host
306+
"""
307+
for host in hosts_gen:
308+
host.allocation_candidates = copy.deepcopy(
309+
alloc_reqs_by_rp_uuid[host.uuid])
310+
yield host
311+
302312
# Note: remember, we are using a generator-iterator here. So only
303313
# traverse this list once. This can bite you if the hosts
304314
# are being scanned in a filter or weighing function.
305315
hosts = self._get_all_host_states(
306316
elevated, spec_obj, provider_summaries)
307317

318+
# alloc_reqs_by_rp_uuid is None during rebuild, so this mean we cannot
319+
# run filters that are using allocation candidates during rebuild
320+
if alloc_reqs_by_rp_uuid is not None:
321+
# wrap the generator to extend the HostState objects with the
322+
# allocation requests for that given host. This is needed to
323+
# support scheduler filters filtering on allocation candidates.
324+
hosts = hosts_with_alloc_reqs(hosts)
325+
308326
# NOTE(sbauza): The RequestSpec.num_instances field contains the number
309327
# of instances created when the RequestSpec was used to first boot some
310328
# instances. This is incorrect when doing a move or resize operation,
@@ -332,6 +350,13 @@ def _schedule(
332350
# the older dict format representing HostState objects.
333351
# TODO(stephenfin): Remove this when we bump scheduler the RPC API
334352
# version to 5.0
353+
# NOTE(gibi): We cannot remove this branch as it is actively used
354+
# when nova calls the scheduler during rebuild (not evacuate) to
355+
# check if the current host is still good for the new image used
356+
# for the rebuild. In this case placement cannot be used to
357+
# generate candidates as that would require space on the current
358+
# compute for double allocation. So no allocation candidates for
359+
# rebuild and therefore alloc_reqs_by_rp_uuid is None
335360
return self._legacy_find_hosts(
336361
context, num_instances, spec_obj, hosts, num_alts,
337362
instance_uuids=instance_uuids)
@@ -345,6 +370,9 @@ def _schedule(
345370
# The list of hosts that have been selected (and claimed).
346371
claimed_hosts = []
347372

373+
# The allocation request allocated on the given claimed host
374+
claimed_alloc_reqs = []
375+
348376
for num, instance_uuid in enumerate(instance_uuids):
349377
# In a multi-create request, the first request spec from the list
350378
# is passed to the scheduler and that request spec's instance_uuid
@@ -371,21 +399,20 @@ def _schedule(
371399
# resource provider UUID
372400
claimed_host = None
373401
for host in hosts:
374-
cn_uuid = host.uuid
375-
if cn_uuid not in alloc_reqs_by_rp_uuid:
376-
msg = ("A host state with uuid = '%s' that did not have a "
377-
"matching allocation_request was encountered while "
378-
"scheduling. This host was skipped.")
379-
LOG.debug(msg, cn_uuid)
402+
if not host.allocation_candidates:
403+
LOG.debug(
404+
"The nova scheduler removed every allocation candidate"
405+
"for host %s so this host was skipped.",
406+
host
407+
)
380408
continue
381409

382-
alloc_reqs = alloc_reqs_by_rp_uuid[cn_uuid]
383410
# TODO(jaypipes): Loop through all allocation_requests instead
384411
# of just trying the first one. For now, since we'll likely
385412
# want to order the allocation_requests in the future based on
386413
# information in the provider summaries, we'll just try to
387414
# claim resources using the first allocation_request
388-
alloc_req = alloc_reqs[0]
415+
alloc_req = host.allocation_candidates[0]
389416
if utils.claim_resources(
390417
elevated, self.placement_client, spec_obj, instance_uuid,
391418
alloc_req,
@@ -405,6 +432,15 @@ def _schedule(
405432

406433
claimed_instance_uuids.append(instance_uuid)
407434
claimed_hosts.append(claimed_host)
435+
claimed_alloc_reqs.append(alloc_req)
436+
437+
# update the provider mapping in the request spec based
438+
# on the allocated candidate as the _consume_selected_host depends
439+
# on this information to temporally consume PCI devices tracked in
440+
# placement
441+
for request_group in spec_obj.requested_resources:
442+
request_group.provider_uuids = alloc_req[
443+
'mappings'][request_group.requester_id]
408444

409445
# Now consume the resources so the filter/weights will change for
410446
# the next instance.
@@ -416,11 +452,19 @@ def _schedule(
416452
self._ensure_sufficient_hosts(
417453
context, claimed_hosts, num_instances, claimed_instance_uuids)
418454

419-
# We have selected and claimed hosts for each instance. Now we need to
420-
# find alternates for each host.
455+
# We have selected and claimed hosts for each instance along with a
456+
# claimed allocation request. Now we need to find alternates for each
457+
# host.
421458
return self._get_alternate_hosts(
422-
claimed_hosts, spec_obj, hosts, num, num_alts,
423-
alloc_reqs_by_rp_uuid, allocation_request_version)
459+
claimed_hosts,
460+
spec_obj,
461+
hosts,
462+
num,
463+
num_alts,
464+
alloc_reqs_by_rp_uuid,
465+
allocation_request_version,
466+
claimed_alloc_reqs,
467+
)
424468

425469
def _ensure_sufficient_hosts(
426470
self, context, hosts, required_count, claimed_uuids=None,
@@ -532,7 +576,21 @@ def _consume_selected_host(selected_host, spec_obj, instance_uuid=None):
532576
def _get_alternate_hosts(
533577
self, selected_hosts, spec_obj, hosts, index, num_alts,
534578
alloc_reqs_by_rp_uuid=None, allocation_request_version=None,
579+
selected_alloc_reqs=None,
535580
):
581+
"""Generate the main Selection and possible alternate Selection
582+
objects for each "instance".
583+
584+
:param selected_hosts: This is a list of HostState objects. Each
585+
HostState represents the main selection for a given instance being
586+
scheduled (we can have multiple instances during multi create).
587+
:param selected_alloc_reqs: This is a list of allocation requests that
588+
are already allocated in placement for the main Selection for each
589+
instance. This list is matching with selected_hosts by index. So
590+
for the first instance the selected host is selected_host[0] and
591+
the already allocated placement candidate is
592+
selected_alloc_reqs[0].
593+
"""
536594
# We only need to filter/weigh the hosts again if we're dealing with
537595
# more than one instance and are going to be picking alternates.
538596
if index > 0 and num_alts > 0:
@@ -546,11 +604,10 @@ def _get_alternate_hosts(
546604
# representing the selected host along with alternates from the same
547605
# cell.
548606
selections_to_return = []
549-
for selected_host in selected_hosts:
607+
for i, selected_host in enumerate(selected_hosts):
550608
# This is the list of hosts for one particular instance.
551609
if alloc_reqs_by_rp_uuid:
552-
selected_alloc_req = alloc_reqs_by_rp_uuid.get(
553-
selected_host.uuid)[0]
610+
selected_alloc_req = selected_alloc_reqs[i]
554611
else:
555612
selected_alloc_req = None
556613

@@ -571,15 +628,17 @@ def _get_alternate_hosts(
571628
if len(selected_plus_alts) >= num_alts + 1:
572629
break
573630

631+
# TODO(gibi): In theory we could generate alternatives on the
632+
# same host if that host has different possible allocation
633+
# candidates for the request. But we don't do that today
574634
if host.cell_uuid == cell_uuid and host not in selected_hosts:
575635
if alloc_reqs_by_rp_uuid is not None:
576-
alt_uuid = host.uuid
577-
if alt_uuid not in alloc_reqs_by_rp_uuid:
636+
if not host.allocation_candidates:
578637
msg = ("A host state with uuid = '%s' that did "
579-
"not have a matching allocation_request "
638+
"not have any remaining allocation_request "
580639
"was encountered while scheduling. This "
581640
"host was skipped.")
582-
LOG.debug(msg, alt_uuid)
641+
LOG.debug(msg, host.uuid)
583642
continue
584643

585644
# TODO(jaypipes): Loop through all allocation_requests
@@ -588,7 +647,13 @@ def _get_alternate_hosts(
588647
# the future based on information in the provider
589648
# summaries, we'll just try to claim resources using
590649
# the first allocation_request
591-
alloc_req = alloc_reqs_by_rp_uuid[alt_uuid][0]
650+
# NOTE(gibi): we are using, and re-using, allocation
651+
# candidates for alternatives here. This is OK as
652+
# these candidates are not yet allocated in placement
653+
# and we don't know if an alternate will ever be used.
654+
# To increase our success we could try to use different
655+
# candidate for different alternative though.
656+
alloc_req = host.allocation_candidates[0]
592657
alt_selection = objects.Selection.from_host_state(
593658
host, alloc_req, allocation_request_version)
594659
else:

0 commit comments

Comments
 (0)