20
20
"""
21
21
22
22
import collections
23
+ import copy
23
24
import random
24
25
25
26
from oslo_log import log as logging
@@ -299,12 +300,29 @@ def _schedule(
299
300
# host, we virtually consume resources on it so subsequent
300
301
# selections can adjust accordingly.
301
302
303
+ def hosts_with_alloc_reqs (hosts_gen ):
304
+ """Extend the HostState objects returned by the generator with
305
+ the allocation requests of that host
306
+ """
307
+ for host in hosts_gen :
308
+ host .allocation_candidates = copy .deepcopy (
309
+ alloc_reqs_by_rp_uuid [host .uuid ])
310
+ yield host
311
+
302
312
# Note: remember, we are using a generator-iterator here. So only
303
313
# traverse this list once. This can bite you if the hosts
304
314
# are being scanned in a filter or weighing function.
305
315
hosts = self ._get_all_host_states (
306
316
elevated , spec_obj , provider_summaries )
307
317
318
+ # alloc_reqs_by_rp_uuid is None during rebuild, so this mean we cannot
319
+ # run filters that are using allocation candidates during rebuild
320
+ if alloc_reqs_by_rp_uuid is not None :
321
+ # wrap the generator to extend the HostState objects with the
322
+ # allocation requests for that given host. This is needed to
323
+ # support scheduler filters filtering on allocation candidates.
324
+ hosts = hosts_with_alloc_reqs (hosts )
325
+
308
326
# NOTE(sbauza): The RequestSpec.num_instances field contains the number
309
327
# of instances created when the RequestSpec was used to first boot some
310
328
# instances. This is incorrect when doing a move or resize operation,
@@ -332,6 +350,13 @@ def _schedule(
332
350
# the older dict format representing HostState objects.
333
351
# TODO(stephenfin): Remove this when we bump scheduler the RPC API
334
352
# version to 5.0
353
+ # NOTE(gibi): We cannot remove this branch as it is actively used
354
+ # when nova calls the scheduler during rebuild (not evacuate) to
355
+ # check if the current host is still good for the new image used
356
+ # for the rebuild. In this case placement cannot be used to
357
+ # generate candidates as that would require space on the current
358
+ # compute for double allocation. So no allocation candidates for
359
+ # rebuild and therefore alloc_reqs_by_rp_uuid is None
335
360
return self ._legacy_find_hosts (
336
361
context , num_instances , spec_obj , hosts , num_alts ,
337
362
instance_uuids = instance_uuids )
@@ -345,6 +370,9 @@ def _schedule(
345
370
# The list of hosts that have been selected (and claimed).
346
371
claimed_hosts = []
347
372
373
+ # The allocation request allocated on the given claimed host
374
+ claimed_alloc_reqs = []
375
+
348
376
for num , instance_uuid in enumerate (instance_uuids ):
349
377
# In a multi-create request, the first request spec from the list
350
378
# is passed to the scheduler and that request spec's instance_uuid
@@ -371,21 +399,20 @@ def _schedule(
371
399
# resource provider UUID
372
400
claimed_host = None
373
401
for host in hosts :
374
- cn_uuid = host .uuid
375
- if cn_uuid not in alloc_reqs_by_rp_uuid :
376
- msg = ( "A host state with uuid = '%s' that did not have a "
377
- "matching allocation_request was encountered while "
378
- "scheduling. This host was skipped." )
379
- LOG . debug ( msg , cn_uuid )
402
+ if not host .allocation_candidates :
403
+ LOG . debug (
404
+ "The nova scheduler removed every allocation candidate "
405
+ "for host %s so this host was skipped." ,
406
+ host
407
+ )
380
408
continue
381
409
382
- alloc_reqs = alloc_reqs_by_rp_uuid [cn_uuid ]
383
410
# TODO(jaypipes): Loop through all allocation_requests instead
384
411
# of just trying the first one. For now, since we'll likely
385
412
# want to order the allocation_requests in the future based on
386
413
# information in the provider summaries, we'll just try to
387
414
# claim resources using the first allocation_request
388
- alloc_req = alloc_reqs [0 ]
415
+ alloc_req = host . allocation_candidates [0 ]
389
416
if utils .claim_resources (
390
417
elevated , self .placement_client , spec_obj , instance_uuid ,
391
418
alloc_req ,
@@ -405,6 +432,15 @@ def _schedule(
405
432
406
433
claimed_instance_uuids .append (instance_uuid )
407
434
claimed_hosts .append (claimed_host )
435
+ claimed_alloc_reqs .append (alloc_req )
436
+
437
+ # update the provider mapping in the request spec based
438
+ # on the allocated candidate as the _consume_selected_host depends
439
+ # on this information to temporally consume PCI devices tracked in
440
+ # placement
441
+ for request_group in spec_obj .requested_resources :
442
+ request_group .provider_uuids = alloc_req [
443
+ 'mappings' ][request_group .requester_id ]
408
444
409
445
# Now consume the resources so the filter/weights will change for
410
446
# the next instance.
@@ -416,11 +452,19 @@ def _schedule(
416
452
self ._ensure_sufficient_hosts (
417
453
context , claimed_hosts , num_instances , claimed_instance_uuids )
418
454
419
- # We have selected and claimed hosts for each instance. Now we need to
420
- # find alternates for each host.
455
+ # We have selected and claimed hosts for each instance along with a
456
+ # claimed allocation request. Now we need to find alternates for each
457
+ # host.
421
458
return self ._get_alternate_hosts (
422
- claimed_hosts , spec_obj , hosts , num , num_alts ,
423
- alloc_reqs_by_rp_uuid , allocation_request_version )
459
+ claimed_hosts ,
460
+ spec_obj ,
461
+ hosts ,
462
+ num ,
463
+ num_alts ,
464
+ alloc_reqs_by_rp_uuid ,
465
+ allocation_request_version ,
466
+ claimed_alloc_reqs ,
467
+ )
424
468
425
469
def _ensure_sufficient_hosts (
426
470
self , context , hosts , required_count , claimed_uuids = None ,
@@ -532,7 +576,21 @@ def _consume_selected_host(selected_host, spec_obj, instance_uuid=None):
532
576
def _get_alternate_hosts (
533
577
self , selected_hosts , spec_obj , hosts , index , num_alts ,
534
578
alloc_reqs_by_rp_uuid = None , allocation_request_version = None ,
579
+ selected_alloc_reqs = None ,
535
580
):
581
+ """Generate the main Selection and possible alternate Selection
582
+ objects for each "instance".
583
+
584
+ :param selected_hosts: This is a list of HostState objects. Each
585
+ HostState represents the main selection for a given instance being
586
+ scheduled (we can have multiple instances during multi create).
587
+ :param selected_alloc_reqs: This is a list of allocation requests that
588
+ are already allocated in placement for the main Selection for each
589
+ instance. This list is matching with selected_hosts by index. So
590
+ for the first instance the selected host is selected_host[0] and
591
+ the already allocated placement candidate is
592
+ selected_alloc_reqs[0].
593
+ """
536
594
# We only need to filter/weigh the hosts again if we're dealing with
537
595
# more than one instance and are going to be picking alternates.
538
596
if index > 0 and num_alts > 0 :
@@ -546,11 +604,10 @@ def _get_alternate_hosts(
546
604
# representing the selected host along with alternates from the same
547
605
# cell.
548
606
selections_to_return = []
549
- for selected_host in selected_hosts :
607
+ for i , selected_host in enumerate ( selected_hosts ) :
550
608
# This is the list of hosts for one particular instance.
551
609
if alloc_reqs_by_rp_uuid :
552
- selected_alloc_req = alloc_reqs_by_rp_uuid .get (
553
- selected_host .uuid )[0 ]
610
+ selected_alloc_req = selected_alloc_reqs [i ]
554
611
else :
555
612
selected_alloc_req = None
556
613
@@ -571,15 +628,17 @@ def _get_alternate_hosts(
571
628
if len (selected_plus_alts ) >= num_alts + 1 :
572
629
break
573
630
631
+ # TODO(gibi): In theory we could generate alternatives on the
632
+ # same host if that host has different possible allocation
633
+ # candidates for the request. But we don't do that today
574
634
if host .cell_uuid == cell_uuid and host not in selected_hosts :
575
635
if alloc_reqs_by_rp_uuid is not None :
576
- alt_uuid = host .uuid
577
- if alt_uuid not in alloc_reqs_by_rp_uuid :
636
+ if not host .allocation_candidates :
578
637
msg = ("A host state with uuid = '%s' that did "
579
- "not have a matching allocation_request "
638
+ "not have any remaining allocation_request "
580
639
"was encountered while scheduling. This "
581
640
"host was skipped." )
582
- LOG .debug (msg , alt_uuid )
641
+ LOG .debug (msg , host . uuid )
583
642
continue
584
643
585
644
# TODO(jaypipes): Loop through all allocation_requests
@@ -588,7 +647,13 @@ def _get_alternate_hosts(
588
647
# the future based on information in the provider
589
648
# summaries, we'll just try to claim resources using
590
649
# the first allocation_request
591
- alloc_req = alloc_reqs_by_rp_uuid [alt_uuid ][0 ]
650
+ # NOTE(gibi): we are using, and re-using, allocation
651
+ # candidates for alternatives here. This is OK as
652
+ # these candidates are not yet allocated in placement
653
+ # and we don't know if an alternate will ever be used.
654
+ # To increase our success we could try to use different
655
+ # candidate for different alternative though.
656
+ alloc_req = host .allocation_candidates [0 ]
592
657
alt_selection = objects .Selection .from_host_state (
593
658
host , alloc_req , allocation_request_version )
594
659
else :
0 commit comments