Skip to content

Commit 3f8eb62

Browse files
committed
Shard affinity for k8s workloads
Instances that are part of the same K8S cluster will get scheduled to the same shard (vCenter). It identifies the K8S cluster by looking at the tags or metadata set by the k8s cluster orchestrators when creating the instances. Kubernikus and Gardener are supported for now. BigVMs are "out of the picture" and should not adhere to shards. They are only scheduled on their allocated hosts. Change-Id: I73d04ba295d23db1d4728e9db124fc2a27c2d4bc
1 parent fe8d4cf commit 3f8eb62

File tree

3 files changed

+481
-47
lines changed

3 files changed

+481
-47
lines changed

nova/db/main/api.py

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2087,6 +2087,70 @@ def instance_get_active_by_window_joined(context, begin, end=None,
20872087
return _instances_fill_metadata(context, query.all(), manual_joins)
20882088

20892089

2090+
@require_context
2091+
@pick_context_manager_reader_allow_async
2092+
def get_k8s_hosts_by_instances_tag(context, tag, filters=None):
2093+
"""Get the list of K8S hosts querying by the instances tags."""
2094+
count_label = func.count('*').label('count')
2095+
query = context.session.query(models.Instance, count_label). \
2096+
join(models.Instance.tags)
2097+
query = _handle_k8s_hosts_query_filters(query, filters)
2098+
query = query.filter(models.Instance.deleted == 0,
2099+
models.Tag.tag == tag)
2100+
2101+
query = query.group_by(models.Instance.host). \
2102+
order_by(sql.desc(count_label))
2103+
2104+
return query.all()
2105+
2106+
2107+
@require_context
2108+
@pick_context_manager_reader_allow_async
2109+
def get_k8s_hosts_by_instances_metadata(context, meta_key, meta_value,
2110+
filters=None):
2111+
"""Get the list of K8S hosts querying by the instances metadata."""
2112+
count_label = func.count('*').label('count')
2113+
query = context.session.query(models.Instance.host, count_label). \
2114+
join(models.InstanceMetadata,
2115+
models.InstanceMetadata.instance_uuid == models.Instance.uuid)
2116+
query = _handle_k8s_hosts_query_filters(query, filters)
2117+
query = query.filter(models.Instance.deleted == 0,
2118+
models.InstanceMetadata.deleted == 0,
2119+
models.InstanceMetadata.key == meta_key,
2120+
models.InstanceMetadata.value == meta_value)
2121+
query = query.group_by(models.Instance.host). \
2122+
order_by(sql.desc(count_label))
2123+
2124+
return query.all()
2125+
2126+
2127+
def _handle_k8s_hosts_query_filters(query, filters=None):
2128+
"""Applies filters to the K8S related queries.
2129+
2130+
Supported filters:
2131+
filters = {
2132+
'hv_type': 'The hypervisor_type',
2133+
'availability_zone': 'The availability zone'
2134+
}
2135+
"""
2136+
if not filters:
2137+
return query
2138+
hv_type = filters.get('hv_type')
2139+
if hv_type:
2140+
query = query.join(
2141+
models.ComputeNode,
2142+
models.Instance.node == models.ComputeNode.hypervisor_hostname)
2143+
2144+
availability_zone = filters.get('availability_zone')
2145+
if availability_zone:
2146+
query = query.filter(
2147+
models.Instance.availability_zone == availability_zone)
2148+
if hv_type:
2149+
query = query.filter(models.ComputeNode.deleted == 0,
2150+
models.ComputeNode.hypervisor_type == hv_type)
2151+
return query
2152+
2153+
20902154
def _instance_get_all_query(context, project_only=False, joins=None):
20912155
if joins is None:
20922156
joins = ['info_cache', 'security_groups']

nova/scheduler/filters/shard_filter.py

Lines changed: 108 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,11 @@
1919
from oslo_log import log as logging
2020

2121
import nova.conf
22+
from nova import context as nova_context
23+
from nova.db.main import api as main_db_api
24+
from nova.objects.aggregate import AggregateList
25+
from nova.objects.build_request import BuildRequest
26+
from nova.objects.instance import Instance
2227
from nova.scheduler import filters
2328
from nova.scheduler import utils
2429
from nova import utils as nova_utils
@@ -28,6 +33,10 @@
2833
CONF = nova.conf.CONF
2934

3035
_SERVICE_AUTH = None
36+
GARDENER_PREFIX = "kubernetes.io-cluster-"
37+
KKS_PREFIX = "kubernikus:kluster"
38+
HANA_PREFIX = "hana_"
39+
VMWARE_HV_TYPE = 'VMware vCenter Server'
3140

3241

3342
class ShardFilter(filters.BaseHostFilter):
@@ -37,6 +46,8 @@ class ShardFilter(filters.BaseHostFilter):
3746
3847
Alternatively the project may have the "sharding_enabled" tag set, which
3948
enables the project for hosts in all shards.
49+
50+
Implements `filter_all` directly instead of `host_passes`
4051
"""
4152

4253
_PROJECT_SHARD_CACHE = {}
@@ -114,11 +125,88 @@ def _get_shards(self, project_id):
114125

115126
return self._PROJECT_SHARD_CACHE.get(project_id)
116127

117-
def host_passes(self, host_state, spec_obj):
128+
def _get_k8s_shard(self, spec_obj):
129+
"""Returns the dominant shard of a K8S cluster.
130+
131+
Returns None if the request is not for an instance that's part of
132+
a K8S cluster, or if this is the first instance of a new cluster.
133+
"""
134+
if (spec_obj.flavor.name.startswith(HANA_PREFIX) or
135+
utils.request_is_resize(spec_obj)):
136+
return None
137+
elevated = nova_context.get_admin_context()
138+
build_request = None
139+
instance = None
140+
141+
def _get_tags():
142+
return build_request.tags if build_request \
143+
else instance.tags
144+
145+
def _get_metadata():
146+
return build_request.instance.metadata if build_request \
147+
else instance.metadata
148+
149+
check_type = spec_obj.get_scheduler_hint('_nova_check_type')
150+
if not check_type:
151+
build_request = BuildRequest.get_by_instance_uuid(
152+
elevated, spec_obj.instance_uuid)
153+
else:
154+
instance = Instance.get_by_uuid(
155+
elevated, spec_obj.instance_uuid,
156+
expected_attrs=['tags', 'metadata'])
157+
158+
kks_tag = next((t.tag for t in _get_tags()
159+
if t.tag.startswith(KKS_PREFIX)), None)
160+
gardener_meta = None
161+
if not kks_tag:
162+
gardener_meta = \
163+
{k: v for k, v in _get_metadata().items()
164+
if k.startswith(GARDENER_PREFIX)}
165+
166+
if not kks_tag and not gardener_meta:
167+
return None
168+
169+
q_filters = {'hv_type': VMWARE_HV_TYPE}
170+
if spec_obj.availability_zone:
171+
q_filters['availability_zone'] = spec_obj.availability_zone
172+
173+
k8s_hosts = None
174+
if kks_tag:
175+
k8s_hosts = nova_context.scatter_gather_skip_cell0(
176+
elevated, main_db_api.get_k8s_hosts_by_instances_tag,
177+
kks_tag, filters=q_filters)
178+
elif gardener_meta:
179+
(meta_key, meta_value) = next(iter(gardener_meta.items()))
180+
k8s_hosts = nova_context.scatter_gather_skip_cell0(
181+
elevated, main_db_api.get_k8s_hosts_by_instances_metadata,
182+
meta_key, meta_value, filters=q_filters)
183+
184+
if not k8s_hosts:
185+
return None
186+
187+
all_shard_aggrs = [agg for agg in AggregateList.get_all(elevated)
188+
if agg.name.startswith(self._SHARD_PREFIX)]
189+
if not all_shard_aggrs:
190+
return None
191+
k8s_hosts = set(k8s_hosts)
192+
193+
shard_aggr = sorted(all_shard_aggrs,
194+
key=lambda i: len(set(i.hosts) & k8s_hosts))[0]
195+
return shard_aggr.name
196+
197+
def filter_all(self, filter_obj_list, spec_obj):
118198
# Only VMware
119199
if utils.is_non_vmware_spec(spec_obj):
120-
return True
200+
LOG.debug("ShardFilter is not applicable in this non-vmware "
201+
"request")
202+
return filter_obj_list
203+
204+
k8s_shard = self._get_k8s_shard(spec_obj)
121205

206+
return [host_state for host_state in filter_obj_list
207+
if self._host_passes(host_state, spec_obj, k8s_shard)]
208+
209+
def _host_passes(self, host_state, spec_obj, k8s_shard):
122210
host_shard_aggrs = [aggr for aggr in host_state.aggregates
123211
if aggr.name.startswith(self._SHARD_PREFIX)]
124212

@@ -148,18 +236,34 @@ def host_passes(self, host_state, spec_obj):
148236
if self._ALL_SHARDS in shards:
149237
LOG.debug('project enabled for all shards %(project_shards)s.',
150238
{'project_shards': shards})
151-
return True
152239
elif host_shard_names & set(shards):
153240
LOG.debug('%(host_state)s shard %(host_shard)s found in project '
154241
'shards %(project_shards)s.',
155242
{'host_state': host_state,
156243
'host_shard': host_shard_names,
157244
'project_shards': shards})
158-
return True
159245
else:
160246
LOG.debug('%(host_state)s shard %(host_shard)s not found in '
161247
'project shards %(project_shards)s.',
162248
{'host_state': host_state,
163249
'host_shard': host_shard_names,
164250
'project_shards': shards})
165251
return False
252+
253+
if k8s_shard:
254+
matches = any(host_shard == k8s_shard
255+
for host_shard in host_shard_names)
256+
if not matches:
257+
LOG.debug("%(host_state)s is not part of the requested "
258+
"K8S cluster.")
259+
return matches
260+
261+
return True
262+
263+
def _host_passes_k8s(self, host_shard_aggrs, k8s_hosts):
264+
"""Instances of a K8S cluster must end up on the same shard.
265+
The K8S cluster is identified by the metadata or tags set
266+
by the orchestrator (Gardener or Kubernikus).
267+
"""
268+
return any(set(aggr.hosts) & k8s_hosts
269+
for aggr in host_shard_aggrs)

0 commit comments

Comments
 (0)