Skip to content

Commit 1dbe424

Browse files
committed
Enhanced external port scheduling
This patch introduces a new configuration for OVN CMS Options called "enable-chassis-as-extport-host". This configuration can be used by ML2/OVN to identify nodes that are eligible for scheduling OVN's external ports. Prior to this patch, external ports were always scheduled on centralized networked nodes tagged with the "enable-chassis-as-gw" flag in the OVN CMS Options but, when it comes to deploying OpenStack on OpenShift requiring services such as the OVN Metadata Agent or DHCP Agent to serve those external ports and running them on control plane nodes are not ideal. This is where this patch comes handy allowing these ports to have more flexibility in where they are scheduled. The patch is also backward compatible and if the new configuration is not present on the OVN CMS Options, ML2/OVN will continue to schedule the external ports on nodes configured with the previous configuration like always. Documentation will be updated on a follow up patch. Conflicts: neutron/common/ovn/utils.py neutron/plugins/ml2/drivers/ovn/mech_driver/ovsdb/impl_idl_ovn.py neutron/plugins/ml2/drivers/ovn/mech_driver/ovsdb/ovn_client.py neutron/tests/unit/plugins/ml2/drivers/ovn/mech_driver/test_mech_driver.py Closes-Bug: 2037294 Change-Id: Ic46d847e3aebfe543d5a7ab49d18d1f1abf1342e Signed-off-by: Lucas Alvares Gomes <[email protected]> (cherry picked from commit 770914f)
1 parent 6205158 commit 1dbe424

File tree

14 files changed

+613
-139
lines changed

14 files changed

+613
-139
lines changed

neutron/common/ovn/constants.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@
6969

7070
OVN_PROVNET_PORT_NAME_PREFIX = 'provnet-'
7171
OVN_NAME_PREFIX = 'neutron-'
72+
OVN_HA_CH_GROUP_EXTPORT_PREFIX = 'neutron-extport-'
7273

7374
# TODO(froyo): Move this to neutron-lib as soon as possible, and when a new
7475
# release is created and pointed to in the requirements remove this code
@@ -307,6 +308,12 @@
307308
# Maximum chassis count where a gateway port can be hosted
308309
MAX_GW_CHASSIS = 5
309310

311+
# Maximum number of Chassis in a HA Chassis Group. Limiting the number
312+
# of members because OVN uses BFD to monitor the connectivity of each member
313+
# in the group. Having an unlimited number of members can potentially
314+
# put a lot of stress on OVN to monitor it all.
315+
MAX_CHASSIS_IN_HA_GROUP = 5
316+
310317
UNKNOWN_ADDR = 'unknown'
311318

312319
PORT_CAP_SWITCHDEV = 'switchdev'
@@ -420,6 +427,7 @@
420427
NEUTRON_AVAILABILITY_ZONES = 'neutron-availability-zones'
421428
OVN_CMS_OPTIONS = 'ovn-cms-options'
422429
CMS_OPT_CHASSIS_AS_GW = 'enable-chassis-as-gw'
430+
CMS_OPT_CHASSIS_AS_EXTPORT_HOST = 'enable-chassis-as-extport-host'
423431
CMS_OPT_AVAILABILITY_ZONES = 'availability-zones'
424432
CMS_OPT_CARD_SERIAL_NUMBER = 'card-serial-number'
425433

neutron/common/ovn/utils.py

Lines changed: 141 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@
3535
from oslo_serialization import jsonutils
3636
from oslo_utils import netutils
3737
from oslo_utils import strutils
38-
from ovsdbapp.backend.ovs_idl import rowview
38+
from ovsdbapp.backend.ovs_idl import idlutils
3939
from ovsdbapp import constants as ovsdbapp_const
4040
import tenacity
4141

@@ -61,6 +61,10 @@
6161
BPInfo = collections.namedtuple(
6262
'BPInfo', ['bp_param', 'vnic_type', 'capabilities'])
6363

64+
HAChassisGroupInfo = collections.namedtuple(
65+
'HAChassisGroupInfo', ['group_name', 'chassis_list', 'az_hints',
66+
'ignore_chassis'])
67+
6468

6569
class OvsdbClientCommand(object):
6670
_CONNECTION = 0
@@ -159,6 +163,11 @@ def ovn_provnet_port_name(network_id):
159163
return constants.OVN_PROVNET_PORT_NAME_PREFIX + '%s' % network_id
160164

161165

166+
def ovn_extport_chassis_group_name(port_id):
167+
# The name of the HA Chassis Group entry will be neutron-extport-<UUID>
168+
return constants.OVN_HA_CH_GROUP_EXTPORT_PREFIX + '%s' % port_id
169+
170+
162171
def ovn_vhu_sockpath(sock_dir, port_id):
163172
# Frame the socket path of a virtio socket
164173
return os.path.join(
@@ -671,6 +680,12 @@ def is_gateway_chassis(chassis):
671680
return constants.CMS_OPT_CHASSIS_AS_GW in get_ovn_cms_options(chassis)
672681

673682

683+
def is_extport_host_chassis(chassis):
684+
"""Check if the given Chassis is marked to host external ports"""
685+
return (constants.CMS_OPT_CHASSIS_AS_EXTPORT_HOST in
686+
get_ovn_cms_options(chassis))
687+
688+
674689
def get_port_capabilities(port):
675690
"""Return a list of port's capabilities"""
676691
return port.get(portbindings.PROFILE, {}).get(constants.PORT_CAP_PARAM, [])
@@ -724,7 +739,7 @@ def get_chassis_in_azs(chassis_list, az_list):
724739
return chassis
725740

726741

727-
def get_gateway_chassis_without_azs(chassis_list):
742+
def get_chassis_without_azs(chassis_list):
728743
"""Return a set of Chassis that does not belong to any AZs.
729744
730745
Filter a list of Chassis and return only the Chassis that does not
@@ -733,7 +748,7 @@ def get_gateway_chassis_without_azs(chassis_list):
733748
:param chassis_list: A list of Chassis objects
734749
:returns: A set of Chassis names
735750
"""
736-
return {ch.name for ch in chassis_list if is_gateway_chassis(ch) and not
751+
return {ch.name for ch in chassis_list if not
737752
get_chassis_availability_zones(ch)}
738753

739754

@@ -862,77 +877,150 @@ def get_ovn_chassis_other_config(chassis):
862877
return chassis.external_ids
863878

864879

865-
def sync_ha_chassis_group(context, network_id, nb_idl, sb_idl, txn):
866-
"""Return the UUID of the HA Chassis Group or the HA Chassis Group cmd.
867-
868-
Given the Neutron Network ID, this method will return (or create
869-
and then return) the appropriate HA Chassis Group the external
870-
port (in that network) needs to be associated with.
880+
def _get_info_for_ha_chassis_group(context, port_id, network_id, sb_idl):
881+
"""Get the common required information to create a HA Chassis Group.
871882
872-
:param context: Neutron API context.
873-
:param network_id: The Neutron network ID.
874-
:param nb_idl: OVN NB IDL
875-
:param sb_idl: OVN SB IDL
876-
:param txn: The ovsdbapp transaction object.
877-
:returns: The HA Chassis Group UUID or the HA Chassis Group command object.
883+
:param context: Neutron API context
884+
:param port_id: The port ID
885+
:param network_id: The network ID
886+
:param sb_idl: OVN SB IDL
887+
:returns: An instance of HAChassisGroupInfo
878888
"""
889+
ignore_chassis = set()
890+
# If there are Chassis marked for hosting external ports create a HA
891+
# Chassis Group per external port, otherwise do it at the network level
892+
chassis_list = sb_idl.get_extport_chassis_from_cms_options()
893+
if chassis_list:
894+
group_name = ovn_extport_chassis_group_name(port_id)
895+
# Check if the port is bound to a chassis and if so, ignore that
896+
# chassis when building the HA Chassis Group to ensure the
897+
# external port is bound to a different chassis than the VM
898+
ignore_chassis = sb_idl.get_chassis_host_for_port(port_id)
899+
LOG.debug('HA Chassis Group %s is based on external port %s '
900+
'(network %s)', group_name, port_id, network_id)
901+
else:
902+
chassis_list = sb_idl.get_gateway_chassis_from_cms_options(
903+
name_only=False)
904+
group_name = ovn_name(network_id)
905+
LOG.debug('HA Chassis Group %s is based on network %s',
906+
group_name, network_id)
907+
908+
# Get the Availability Zones hints
879909
plugin = directory.get_plugin()
880910
az_hints = common_utils.get_az_hints(
881911
plugin.get_network(context, network_id))
882912

883-
ha_ch_grp_name = ovn_name(network_id)
884-
ext_ids = {constants.OVN_AZ_HINTS_EXT_ID_KEY: ','.join(az_hints)}
885-
hcg_cmd = txn.add(nb_idl.ha_chassis_group_add(
886-
ha_ch_grp_name, may_exist=True, external_ids=ext_ids))
913+
return HAChassisGroupInfo(
914+
group_name=group_name, chassis_list=chassis_list, az_hints=az_hints,
915+
ignore_chassis=ignore_chassis)
887916

888-
if isinstance(hcg_cmd.result, rowview.RowView):
889-
# The HA chassis group existed before this transaction.
890-
ha_ch_grp = hcg_cmd.result
891-
else:
892-
# The HA chassis group is being created in this transaction.
893-
ha_ch_grp = None
894917

895-
# Get the chassis belonging to the AZ hints
896-
ch_list = sb_idl.get_gateway_chassis_from_cms_options(name_only=False)
897-
if not az_hints:
898-
az_chassis = get_gateway_chassis_without_azs(ch_list)
918+
def _filter_candidates_for_ha_chassis_group(hcg_info):
919+
"""Filter a list of chassis candidates for a given HA Chassis Group.
920+
921+
Filter a list of chassis candidates for a given HA Chassis Group taking
922+
in consideration availability zones if present.
923+
924+
:param hcg_info: A instance of HAChassisGroupInfo
925+
:returns: A list of chassis
926+
"""
927+
if hcg_info.az_hints:
928+
candidates = get_chassis_in_azs(hcg_info.chassis_list,
929+
hcg_info.az_hints)
930+
LOG.debug('Taking in consideration the AZs "%s" for HA '
931+
'Chassis Group %s', ','.join(hcg_info.az_hints),
932+
hcg_info.group_name)
899933
else:
900-
az_chassis = get_chassis_in_azs(ch_list, az_hints)
934+
candidates = get_chassis_without_azs(hcg_info.chassis_list)
901935

936+
# Remove the ignored Chassis, if present
937+
if hcg_info.ignore_chassis:
938+
LOG.debug('Ignoring chassis %s for HA Chassis Group %s',
939+
', '.join(hcg_info.ignore_chassis), hcg_info.group_name)
940+
candidates = candidates - hcg_info.ignore_chassis
941+
942+
return candidates
943+
944+
945+
def sync_ha_chassis_group(context, port_id, network_id, nb_idl, sb_idl, txn):
946+
"""Return the UUID of the HA Chassis Group or the HA Chassis Group cmd.
947+
948+
Given the Neutron Network ID, this method will return (or create
949+
and then return) the appropriate HA Chassis Group the external
950+
port (in that network) needs to be associated with.
951+
952+
:param context: Neutron API context
953+
:param port_id: The port ID
954+
:param network_id: The network ID
955+
:param nb_idl: OVN NB IDL
956+
:param sb_idl: OVN SB IDL
957+
:param txn: The ovsdbapp transaction object
958+
:returns: The HA Chassis Group UUID or the HA Chassis Group command object
959+
"""
960+
# If there are Chassis marked for hosting external ports create a HA
961+
# Chassis Group per external port, otherwise do it at the network level
962+
hcg_info = _get_info_for_ha_chassis_group(context, port_id, network_id,
963+
sb_idl)
964+
candidates = _filter_candidates_for_ha_chassis_group(hcg_info)
965+
966+
# Try to get the HA Chassis Group or create if it doesn't exist
967+
ha_ch_grp = ha_ch_grp_cmd = None
968+
try:
969+
ha_ch_grp = nb_idl.ha_chassis_group_get(
970+
hcg_info.group_name).execute(check_error=True)
971+
except idlutils.RowNotFound:
972+
ext_ids = {constants.OVN_AZ_HINTS_EXT_ID_KEY: ','.join(
973+
hcg_info.az_hints)}
974+
ha_ch_grp_cmd = txn.add(nb_idl.ha_chassis_group_add(
975+
hcg_info.group_name, may_exist=True, external_ids=ext_ids))
976+
977+
max_chassis_number = constants.MAX_CHASSIS_IN_HA_GROUP
902978
priority = constants.HA_CHASSIS_GROUP_HIGHEST_PRIORITY
979+
980+
# Check if the HA Chassis Group existed before. If so, re-calculate
981+
# the canditates in case something changed and keep the highest priority
982+
# chassis in the group (if it's an eligible candidate) with the highest
983+
# priority to avoid external ports from moving around
903984
if ha_ch_grp:
904985
# Remove any chassis that no longer belongs to the AZ hints
986+
# or is ignored
905987
all_ch = {ch.chassis_name for ch in ha_ch_grp.ha_chassis}
906-
ch_to_del = all_ch - az_chassis
988+
ch_to_del = all_ch - candidates
907989
for ch in ch_to_del:
908990
txn.add(nb_idl.ha_chassis_group_del_chassis(
909-
ha_ch_grp_name, ch, if_exists=True))
991+
hcg_info.group_name, ch, if_exists=True))
910992

911-
# Find the highest priority chassis in the HA Chassis Group. If
912-
# it exists and still belongs to the same AZ, keep it as the
913-
# highest priority in the group to avoid ports already bond to it
914-
# from moving to another chassis.
993+
# Find the highest priority chassis in the HA Chassis Group
915994
high_prio_ch = max(ha_ch_grp.ha_chassis, key=lambda x: x.priority,
916995
default=None)
917-
priority = constants.HA_CHASSIS_GROUP_HIGHEST_PRIORITY
918-
if high_prio_ch and high_prio_ch.chassis_name in az_chassis:
996+
if (high_prio_ch and
997+
high_prio_ch.chassis_name in candidates):
998+
# If found, keep it as the highest priority chassis in the group
919999
txn.add(nb_idl.ha_chassis_group_add_chassis(
920-
ha_ch_grp_name, high_prio_ch.chassis_name,
1000+
hcg_info.group_name, high_prio_ch.chassis_name,
9211001
priority=priority))
922-
az_chassis.remove(high_prio_ch.chassis_name)
1002+
candidates.remove(high_prio_ch.chassis_name)
9231003
priority -= 1
924-
925-
# Randomize the order so that networks belonging to the same
926-
# availability zones do not necessarily end up with the same
927-
# Chassis as the highest priority one.
928-
for ch in random.sample(list(az_chassis), len(az_chassis)):
1004+
max_chassis_number -= 1
1005+
LOG.debug('Keeping chassis %s as the highest priority chassis '
1006+
'for HA Chassis Group %s', high_prio_ch.chassis_name,
1007+
hcg_info.group_name)
1008+
1009+
# random.sample() second parameter needs to be <= the list size,
1010+
# that's why we need to check for the max value here
1011+
max_chassis_number = min(max_chassis_number, len(candidates))
1012+
# Limit the number of members and randomize the order so each group,
1013+
# even if they belonging to the same availability zones do not
1014+
# necessarily end up with the same Chassis as the highest priority one.
1015+
for ch in random.sample(list(candidates), max_chassis_number):
9291016
txn.add(nb_idl.ha_chassis_group_add_chassis(
930-
hcg_cmd, ch, priority=priority))
1017+
hcg_info.group_name, ch, priority=priority))
9311018
priority -= 1
9321019

1020+
LOG.info('HA Chassis Group %s synchronized', hcg_info.group_name)
9331021
# Return the existing register UUID or the HA chassis group creation
9341022
# command (see ovsdbapp ``HAChassisGroupAddChassisCommand`` class).
935-
return ha_ch_grp.uuid if ha_ch_grp else hcg_cmd
1023+
return ha_ch_grp.uuid if ha_ch_grp else ha_ch_grp_cmd
9361024

9371025

9381026
def get_subnets_address_scopes(context, subnets, fixed_ips, ml2_plugin):
@@ -1090,3 +1178,9 @@ def get_requested_chassis(requested_chassis):
10901178
if isinstance(requested_chassis, str):
10911179
return requested_chassis.split(',')
10921180
return []
1181+
1182+
1183+
# TODO(lucasagomes): Remove this function when the additional_chassis column
1184+
# becomes the norm and older versions of OVN are no longer supported
1185+
def is_additional_chassis_supported(idl):
1186+
return idl.is_col_present('Port_Binding', 'additional_chassis')

neutron/plugins/ml2/drivers/ovn/mech_driver/mech_driver.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1149,6 +1149,22 @@ def set_port_status_up(self, port_id):
11491149
LOG.debug('Port not found during OVN status up report: %s',
11501150
port_id)
11511151

1152+
# NOTE(lucasagomes): If needed, re-sync the HA Chassis Group for
1153+
# the external port removing the chassis which the port is bound
1154+
# to from the group so the external port does not live in the
1155+
# same chassis as the VM
1156+
if (ovn_utils.is_port_external(db_port) and
1157+
self.sb_ovn.get_extport_chassis_from_cms_options()):
1158+
try:
1159+
with self.nb_ovn.transaction(check_error=True) as txn:
1160+
ovn_utils.sync_ha_chassis_group(
1161+
admin_context, db_port['id'], db_port['network_id'],
1162+
self.nb_ovn, self.sb_ovn, txn)
1163+
except Exception as e:
1164+
LOG.error('Error while syncing the HA Chassis Group for the '
1165+
'external port %s during set port status up. '
1166+
'Error: %s', db_port['id'], e)
1167+
11521168
def set_port_status_down(self, port_id):
11531169
# Port provisioning is required now that OVN has reported that the
11541170
# port is down. Insert a provisioning block and mark the port down

neutron/plugins/ml2/drivers/ovn/mech_driver/ovsdb/api.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -659,6 +659,20 @@ def get_gateway_chassis_from_cms_options(self, name_only=True):
659659
:returns: List with chassis.
660660
"""
661661

662+
@abc.abstractmethod
663+
def get_extport_chassis_from_cms_options(self):
664+
"""Get chassis eligible for hosting external ports from CMS options.
665+
666+
When admin wants to enable hosting external ports on different
667+
chassis than gateway chassis as
668+
669+
ovs-vsctl set open .
670+
external_ids:ovn-cms-options="enable-chassis-as-extport-host"
671+
In this function, we parse ovn-cms-options and return these chassis
672+
673+
:returns: List with chassis
674+
"""
675+
662676
@abc.abstractmethod
663677
def get_chassis_and_physnets(self):
664678
"""Return a dict contains chassis name and physnets mapping.
@@ -674,3 +688,11 @@ def get_all_chassis(self, chassis_type=None):
674688
:param chassis_type: The type of chassis
675689
:type chassis_type: string
676690
"""
691+
692+
@abc.abstractmethod
693+
def get_chassis_host_for_port(self, port_id):
694+
"""Return a list of Chassis name hosting the port
695+
696+
:param port_id: The port ID
697+
:type port_id: string
698+
"""

neutron/plugins/ml2/drivers/ovn/mech_driver/ovsdb/impl_idl_ovn.py

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -875,9 +875,11 @@ def get_chassis_hostname_and_physnets(self):
875875
def get_gateway_chassis_from_cms_options(self, name_only=True):
876876
return [ch.name if name_only else ch
877877
for ch in self.chassis_list().execute(check_error=True)
878-
if ovn_const.CMS_OPT_CHASSIS_AS_GW in
879-
utils.get_ovn_chassis_other_config(ch).get(
880-
ovn_const.OVN_CMS_OPTIONS, '').split(',')]
878+
if utils.is_gateway_chassis(ch)]
879+
880+
def get_extport_chassis_from_cms_options(self):
881+
return [ch for ch in self.chassis_list().execute(check_error=True)
882+
if utils.is_extport_host_chassis(ch)]
881883

882884
def get_chassis_and_physnets(self):
883885
chassis_info_dict = {}
@@ -964,3 +966,19 @@ def get_ports_on_chassis(self, chassis):
964966
def db_set(self, table, record, *col_values, if_exists=True, **columns):
965967
return cmd.DbSetCommand(self, table, record, *col_values,
966968
if_exists=if_exists, **columns)
969+
970+
def get_chassis_host_for_port(self, port_id):
971+
chassis = set()
972+
cmd = self.db_find_rows('Port_Binding', ('logical_port', '=', port_id))
973+
for row in cmd.execute(check_error=True):
974+
try:
975+
chassis.add(row.chassis[0].name)
976+
except IndexError:
977+
# Do not short-circuit here. Proceed to additional
978+
# chassis handling
979+
pass
980+
981+
if utils.is_additional_chassis_supported(self):
982+
for ch in row.additional_chassis:
983+
chassis.add(ch.name)
984+
return chassis

neutron/plugins/ml2/drivers/ovn/mech_driver/ovsdb/maintenance.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -606,7 +606,8 @@ def check_for_ha_chassis_group(self):
606606
ovn_const.OVN_NETWORK_NAME_EXT_ID_KEY].replace(
607607
ovn_const.OVN_NAME_PREFIX, '')
608608
ha_ch_grp = utils.sync_ha_chassis_group(
609-
context, network_id, self._nb_idl, self._sb_idl, txn)
609+
context, port.name, network_id, self._nb_idl,
610+
self._sb_idl, txn)
610611
txn.add(self._nb_idl.set_lswitch_port(
611612
port.name, ha_chassis_group=ha_ch_grp))
612613

0 commit comments

Comments
 (0)