|
| 1 | +# Copyright 2022 StackHPC |
| 2 | +# |
| 3 | +# Licensed under the Apache License, Version 2.0 (the "License"); you may |
| 4 | +# not use this file except in compliance with the License. You may obtain |
| 5 | +# a copy of the License at |
| 6 | +# |
| 7 | +# http://www.apache.org/licenses/LICENSE-2.0 |
| 8 | +# |
| 9 | +# Unless required by applicable law or agreed to in writing, software |
| 10 | +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT |
| 11 | +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the |
| 12 | +# License for the specific language governing permissions and limitations |
| 13 | +# under the License. |
| 14 | + |
| 15 | + |
| 16 | +import os_resource_classes as orc |
| 17 | +from oslo_limit import exception as limit_exceptions |
| 18 | +from oslo_limit import limit |
| 19 | +from oslo_log import log as logging |
| 20 | + |
| 21 | +import nova.conf |
| 22 | +from nova import exception |
| 23 | +from nova.limit import utils as limit_utils |
| 24 | +from nova import objects |
| 25 | +from nova import quota |
| 26 | +from nova.scheduler.client import report |
| 27 | +from nova.scheduler import utils |
| 28 | + |
| 29 | +LOG = logging.getLogger(__name__) |
| 30 | +CONF = nova.conf.CONF |
| 31 | + |
| 32 | +# Cache to avoid repopulating ksa state |
| 33 | +PLACEMENT_CLIENT = None |
| 34 | + |
| 35 | + |
| 36 | +def _get_placement_usages(context, project_id): |
| 37 | + global PLACEMENT_CLIENT |
| 38 | + if not PLACEMENT_CLIENT: |
| 39 | + PLACEMENT_CLIENT = report.SchedulerReportClient() |
| 40 | + return PLACEMENT_CLIENT.get_usages_counts_for_limits(context, project_id) |
| 41 | + |
| 42 | + |
| 43 | +def _get_usage(context, project_id, resource_names): |
| 44 | + """Called by oslo_limit's enforcer""" |
| 45 | + if not limit_utils.use_unified_limits(): |
| 46 | + raise NotImplementedError("unified limits is disabled") |
| 47 | + |
| 48 | + count_servers = False |
| 49 | + resource_classes = [] |
| 50 | + |
| 51 | + for resource in resource_names: |
| 52 | + if resource == "servers": |
| 53 | + count_servers = True |
| 54 | + continue |
| 55 | + |
| 56 | + if not resource.startswith("class:"): |
| 57 | + raise ValueError("Unknown resource type: %s" % resource) |
| 58 | + |
| 59 | + # Temporarily strip resource class prefix as placement does not use it. |
| 60 | + # Example: limit resource 'class:VCPU' will be returned as 'VCPU' from |
| 61 | + # placement. |
| 62 | + r_class = resource.lstrip("class:") |
| 63 | + if r_class in orc.STANDARDS or orc.is_custom(r_class): |
| 64 | + resource_classes.append(r_class) |
| 65 | + else: |
| 66 | + raise ValueError("Unknown resource class: %s" % r_class) |
| 67 | + |
| 68 | + if not count_servers and len(resource_classes) == 0: |
| 69 | + raise ValueError("no resources to check") |
| 70 | + |
| 71 | + resource_counts = {} |
| 72 | + if count_servers: |
| 73 | + # TODO(melwitt): Change this to count servers from placement once nova |
| 74 | + # is using placement consumer types and is able to differentiate |
| 75 | + # between "instance" allocations vs "migration" allocations. |
| 76 | + if not quota.is_qfd_populated(context): |
| 77 | + LOG.error('Must migrate all instance mappings before using ' |
| 78 | + 'unified limits') |
| 79 | + raise ValueError("must first migrate instance mappings") |
| 80 | + mappings = objects.InstanceMappingList.get_counts(context, project_id) |
| 81 | + resource_counts['servers'] = mappings['project']['instances'] |
| 82 | + |
| 83 | + try: |
| 84 | + usages = _get_placement_usages(context, project_id) |
| 85 | + except exception.UsagesRetrievalFailed as e: |
| 86 | + msg = ("Failed to retrieve usages from placement while enforcing " |
| 87 | + "%s quota limits." % ", ".join(resource_names)) |
| 88 | + LOG.error(msg + " Error: " + str(e)) |
| 89 | + raise exception.UsagesRetrievalFailed(msg) |
| 90 | + |
| 91 | + # Use legacy behavior VCPU = VCPU + PCPU if configured. |
| 92 | + if CONF.workarounds.unified_limits_count_pcpu_as_vcpu: |
| 93 | + # If PCPU is in resource_classes, that means it was specified in the |
| 94 | + # flavor explicitly. In that case, we expect it to have its own limit |
| 95 | + # registered and we should not fold it into VCPU. |
| 96 | + if orc.PCPU in usages and orc.PCPU not in resource_classes: |
| 97 | + usages[orc.VCPU] = (usages.get(orc.VCPU, 0) + |
| 98 | + usages.get(orc.PCPU, 0)) |
| 99 | + |
| 100 | + for resource_class in resource_classes: |
| 101 | + # Need to add back resource class prefix that was stripped earlier |
| 102 | + resource_name = 'class:' + resource_class |
| 103 | + # Placement doesn't know about classes with zero usage |
| 104 | + # so default to zero to tell oslo.limit usage is zero |
| 105 | + resource_counts[resource_name] = usages.get(resource_class, 0) |
| 106 | + |
| 107 | + return resource_counts |
| 108 | + |
| 109 | + |
| 110 | +def _get_deltas_by_flavor(flavor, is_bfv, count): |
| 111 | + if flavor is None: |
| 112 | + raise ValueError("flavor") |
| 113 | + if count < 0: |
| 114 | + raise ValueError("count") |
| 115 | + |
| 116 | + # NOTE(johngarbutt): this skips bfv, port, and cyborg resources |
| 117 | + # but it still gives us better checks than before unified limits |
| 118 | + # We need an instance in the DB to use the current is_bfv logic |
| 119 | + # which doesn't work well for instances that don't yet have a uuid |
| 120 | + deltas_from_flavor = utils.resources_for_limits(flavor, is_bfv) |
| 121 | + |
| 122 | + deltas = {"servers": count} |
| 123 | + for resource, amount in deltas_from_flavor.items(): |
| 124 | + if amount != 0: |
| 125 | + deltas["class:%s" % resource] = amount * count |
| 126 | + return deltas |
| 127 | + |
| 128 | + |
| 129 | +def _get_enforcer(context, project_id): |
| 130 | + # NOTE(johngarbutt) should we move context arg into oslo.limit? |
| 131 | + def callback(project_id, resource_names): |
| 132 | + return _get_usage(context, project_id, resource_names) |
| 133 | + |
| 134 | + return limit.Enforcer(callback) |
| 135 | + |
| 136 | + |
| 137 | +def enforce_num_instances_and_flavor(context, project_id, flavor, is_bfvm, |
| 138 | + min_count, max_count, enforcer=None): |
| 139 | + """Return max instances possible, else raise TooManyInstances exception.""" |
| 140 | + if not limit_utils.use_unified_limits(): |
| 141 | + return max_count |
| 142 | + |
| 143 | + # Ensure the recursion will always complete |
| 144 | + if min_count < 0 or min_count > max_count: |
| 145 | + raise ValueError("invalid min_count") |
| 146 | + if max_count < 0: |
| 147 | + raise ValueError("invalid max_count") |
| 148 | + |
| 149 | + deltas = _get_deltas_by_flavor(flavor, is_bfvm, max_count) |
| 150 | + enforcer = _get_enforcer(context, project_id) |
| 151 | + try: |
| 152 | + enforcer.enforce(project_id, deltas) |
| 153 | + except limit_exceptions.ProjectOverLimit as e: |
| 154 | + # NOTE(johngarbutt) we can do better, but this is very simple |
| 155 | + LOG.debug("Limit check failed with count %s retrying with count %s", |
| 156 | + max_count, max_count - 1) |
| 157 | + try: |
| 158 | + return enforce_num_instances_and_flavor(context, project_id, |
| 159 | + flavor, is_bfvm, min_count, |
| 160 | + max_count - 1, |
| 161 | + enforcer=enforcer) |
| 162 | + except ValueError: |
| 163 | + # Copy the *original* exception message to a OverQuota to |
| 164 | + # propagate to the API layer |
| 165 | + raise exception.TooManyInstances(str(e)) |
| 166 | + |
| 167 | + # no problems with max_count, so we return max count |
| 168 | + return max_count |
0 commit comments