Skip to content

Commit 47187e1

Browse files
Zuulopenstack-gerrit
authored andcommitted
Merge "[OVN] Remove SB "Chassis"/"Chassis_Private" duplicated registers" into stable/yoga
2 parents 9190ad6 + ba16962 commit 47187e1

File tree

3 files changed

+132
-0
lines changed

3 files changed

+132
-0
lines changed

neutron/plugins/ml2/drivers/ovn/mech_driver/ovsdb/maintenance.py

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -997,6 +997,63 @@ def check_fair_meter_consistency(self):
997997
from_reload=True)
998998
raise periodics.NeverAgain()
999999

1000+
@periodics.periodic(spacing=300, run_immediately=True)
1001+
def remove_duplicated_chassis_registers(self):
1002+
"""Remove the "Chassis" and "Chassis_Private" duplicated registers.
1003+
1004+
When the ovn-controller service of a node is updated and the system-id
1005+
is changed, if the old service is not stopped gracefully, it will leave
1006+
a "Chassis" and a "Chassis_Private" registers on the OVN SB database.
1007+
These leftovers must be removed.
1008+
1009+
NOTE: this method is executed every 5 minutes. If a new chassis is
1010+
added, this method will perform again the clean-up process.
1011+
1012+
NOTE: this method can be executed only if the OVN SB has the
1013+
"Chassis_Private" table. Otherwise, is not possible to find out which
1014+
register is newer and thus must be kept in the database.
1015+
"""
1016+
if not self._sb_idl.is_table_present('Chassis_Private'):
1017+
raise periodics.NeverAgain()
1018+
1019+
if not self.has_lock:
1020+
return
1021+
1022+
# dup_chassis_port_host = {host_name: [(ch1, ch_private1),
1023+
# (ch2, ch_private2), ... ]}
1024+
dup_chassis_port_host = {}
1025+
chassis = self._sb_idl.chassis_list().execute(check_error=True)
1026+
chassis_hostnames = {ch.hostname for ch in chassis}
1027+
# Find the duplicated "Chassis" and "Chassis_Private" registers,
1028+
# comparing the hostname.
1029+
for hostname in chassis_hostnames:
1030+
ch_list = []
1031+
# Find these chassis matching the hostname and create a list.
1032+
for ch in (ch for ch in chassis if ch.hostname == hostname):
1033+
ch_private = self._sb_idl.lookup('Chassis_Private', ch.name,
1034+
default=None)
1035+
if ch_private:
1036+
ch_list.append((ch, ch_private))
1037+
1038+
# If the chassis list > 1, then we have duplicated chassis.
1039+
if len(ch_list) > 1:
1040+
# Order ch_list by Chassis_Private.nb_cfg_timestamp, from newer
1041+
# (greater value) to older.
1042+
ch_list.sort(key=lambda x: x[1].nb_cfg_timestamp, reverse=True)
1043+
dup_chassis_port_host[hostname] = ch_list
1044+
1045+
if not dup_chassis_port_host:
1046+
return
1047+
1048+
# Remove the "Chassis" and "Chassis_Private" registers with the
1049+
# older Chassis_Private.nb_cfg_timestamp.
1050+
with self._sb_idl.transaction(check_error=True) as txn:
1051+
for ch_list in dup_chassis_port_host.values():
1052+
# The first item is skipped, this is the newest element.
1053+
for ch, ch_private in ch_list[1:]:
1054+
for table in ('Chassis_Private', 'Chassis'):
1055+
txn.add(self._sb_idl.db_destroy(table, ch.name))
1056+
10001057

10011058
class HashRingHealthCheckPeriodics(object):
10021059

neutron/tests/functional/plugins/ml2/drivers/ovn/mech_driver/ovsdb/test_maintenance.py

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1049,6 +1049,64 @@ def _verify_lb(test, protocol, vip_ext_port, vip_int_port):
10491049
# Assert load balancer for port forwarding is gone
10501050
self.assertFalse(self._find_pf_lb(router_id, fip_id))
10511051

1052+
def test_remove_duplicated_chassis_registers(self):
1053+
hostnames = ['host1', 'host2']
1054+
for hostname in hostnames:
1055+
for _ in range(3):
1056+
self.add_fake_chassis(hostname)
1057+
1058+
chassis = self.sb_api.chassis_list().execute(check_error=True)
1059+
self.assertEqual(6, len(chassis))
1060+
# Make the chassis private timestamp different
1061+
for idx, ch in enumerate(chassis):
1062+
self.sb_api.db_set('Chassis_Private', ch.name,
1063+
('nb_cfg_timestamp', idx)).execute()
1064+
1065+
ch_private_dict = {} # host: [ch_private1, ch_private2, ...]
1066+
for hostname in hostnames:
1067+
ch_private_list = []
1068+
for ch in (ch for ch in chassis if ch.hostname == hostname):
1069+
ch_private = self.sb_api.lookup('Chassis_Private', ch.name,
1070+
default=None)
1071+
if ch_private:
1072+
# One of the "Chassis_Private" has been deleted on purpose
1073+
# in this test.
1074+
ch_private_list.append(ch_private)
1075+
ch_private_list.sort(key=lambda x: x.nb_cfg_timestamp,
1076+
reverse=True)
1077+
ch_private_dict[hostname] = ch_private_list
1078+
1079+
self.maint.remove_duplicated_chassis_registers()
1080+
chassis_result = self.sb_api.chassis_list().execute(check_error=True)
1081+
self.assertEqual(2, len(chassis_result))
1082+
for ch in chassis_result:
1083+
self.assertIn(ch.hostname, hostnames)
1084+
hostnames.remove(ch.hostname)
1085+
# From ch_private_dict[ch.hostname], we retrieve the first
1086+
# "Chassis_Private" register because these are ordered by
1087+
# timestamp. The newer one (bigger timestamp) should remain in the
1088+
# system.
1089+
ch_expected = ch_private_dict[ch.hostname][0].chassis[0]
1090+
self.assertEqual(ch_expected.name, ch.name)
1091+
1092+
def test_remove_duplicated_chassis_registers_no_ch_private_register(self):
1093+
for _ in range(2):
1094+
self.add_fake_chassis('host1')
1095+
1096+
chassis = self.sb_api.chassis_list().execute(check_error=True)
1097+
self.assertEqual(2, len(chassis))
1098+
# Make the chassis private timestamp different
1099+
# Delete on of the "Chassis_Private" registers.
1100+
self.sb_api.db_destroy('Chassis_Private', chassis[0].name).execute()
1101+
self.sb_api.db_set('Chassis_Private', chassis[1].name,
1102+
('nb_cfg_timestamp', 1)).execute()
1103+
1104+
self.maint.remove_duplicated_chassis_registers()
1105+
chassis_result = self.sb_api.chassis_list().execute(check_error=True)
1106+
# Both "Chassis" registers are still in the DB because one
1107+
# "Chassis_Private" register was missing.
1108+
self.assertEqual(2, len(chassis_result))
1109+
10521110

10531111
class TestLogMaintenance(_TestMaintenanceHelper,
10541112
test_log_driver.LogApiTestCaseBase):
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
---
2+
issues:
3+
- |
4+
When using ML2/OVN, during an upgrade procedure, the OVS system-id stored
5+
value can be changed. The ovn-controller service will create the "Chassis"
6+
and "Chassis_Private" registers based on this OVS system-id. If the
7+
ovn-controller process is not gracefully stopped, that could lead to the
8+
existence of duplicated "Chassis" and "Chassis_Private" registers in the
9+
OVN Southbound database.
10+
fixes:
11+
- |
12+
A new OVN maintenance method ``remove_duplicated_chassis_registers`` is
13+
added. This method will periodically check the OVN Southbound "Chassis"
14+
and "Chassis_Private" tables looking for duplicated registers. The older
15+
ones (based on the "Chassis_Private.nb_cfg_timestamp" value) will be
16+
removed when more than one register has the same hostname, that should
17+
be unique.

0 commit comments

Comments
 (0)