Skip to content

Commit 02d2f03

Browse files
Zuulopenstack-gerrit
authored andcommitted
Merge "[OVN] Remove SB "Chassis"/"Chassis_Private" duplicated registers" into stable/2023.1
2 parents 6471cd5 + 7fc12de commit 02d2f03

File tree

3 files changed

+132
-0
lines changed

3 files changed

+132
-0
lines changed

neutron/plugins/ml2/drivers/ovn/mech_driver/ovsdb/maintenance.py

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1040,6 +1040,63 @@ def check_fair_meter_consistency(self):
10401040
from_reload=True)
10411041
raise periodics.NeverAgain()
10421042

1043+
@periodics.periodic(spacing=300, run_immediately=True)
1044+
def remove_duplicated_chassis_registers(self):
1045+
"""Remove the "Chassis" and "Chassis_Private" duplicated registers.
1046+
1047+
When the ovn-controller service of a node is updated and the system-id
1048+
is changed, if the old service is not stopped gracefully, it will leave
1049+
a "Chassis" and a "Chassis_Private" registers on the OVN SB database.
1050+
These leftovers must be removed.
1051+
1052+
NOTE: this method is executed every 5 minutes. If a new chassis is
1053+
added, this method will perform again the clean-up process.
1054+
1055+
NOTE: this method can be executed only if the OVN SB has the
1056+
"Chassis_Private" table. Otherwise, is not possible to find out which
1057+
register is newer and thus must be kept in the database.
1058+
"""
1059+
if not self._sb_idl.is_table_present('Chassis_Private'):
1060+
raise periodics.NeverAgain()
1061+
1062+
if not self.has_lock:
1063+
return
1064+
1065+
# dup_chassis_port_host = {host_name: [(ch1, ch_private1),
1066+
# (ch2, ch_private2), ... ]}
1067+
dup_chassis_port_host = {}
1068+
chassis = self._sb_idl.chassis_list().execute(check_error=True)
1069+
chassis_hostnames = {ch.hostname for ch in chassis}
1070+
# Find the duplicated "Chassis" and "Chassis_Private" registers,
1071+
# comparing the hostname.
1072+
for hostname in chassis_hostnames:
1073+
ch_list = []
1074+
# Find these chassis matching the hostname and create a list.
1075+
for ch in (ch for ch in chassis if ch.hostname == hostname):
1076+
ch_private = self._sb_idl.lookup('Chassis_Private', ch.name,
1077+
default=None)
1078+
if ch_private:
1079+
ch_list.append((ch, ch_private))
1080+
1081+
# If the chassis list > 1, then we have duplicated chassis.
1082+
if len(ch_list) > 1:
1083+
# Order ch_list by Chassis_Private.nb_cfg_timestamp, from newer
1084+
# (greater value) to older.
1085+
ch_list.sort(key=lambda x: x[1].nb_cfg_timestamp, reverse=True)
1086+
dup_chassis_port_host[hostname] = ch_list
1087+
1088+
if not dup_chassis_port_host:
1089+
return
1090+
1091+
# Remove the "Chassis" and "Chassis_Private" registers with the
1092+
# older Chassis_Private.nb_cfg_timestamp.
1093+
with self._sb_idl.transaction(check_error=True) as txn:
1094+
for ch_list in dup_chassis_port_host.values():
1095+
# The first item is skipped, this is the newest element.
1096+
for ch, ch_private in ch_list[1:]:
1097+
for table in ('Chassis_Private', 'Chassis'):
1098+
txn.add(self._sb_idl.db_destroy(table, ch.name))
1099+
10431100

10441101
class HashRingHealthCheckPeriodics(object):
10451102

neutron/tests/functional/plugins/ml2/drivers/ovn/mech_driver/ovsdb/test_maintenance.py

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -950,6 +950,64 @@ def _verify_lb(test, protocol, vip_ext_port, vip_int_port):
950950
# Assert load balancer for port forwarding is gone
951951
self.assertFalse(self._find_pf_lb(router_id, fip_id))
952952

953+
def test_remove_duplicated_chassis_registers(self):
954+
hostnames = ['host1', 'host2']
955+
for hostname in hostnames:
956+
for _ in range(3):
957+
self.add_fake_chassis(hostname)
958+
959+
chassis = self.sb_api.chassis_list().execute(check_error=True)
960+
self.assertEqual(6, len(chassis))
961+
# Make the chassis private timestamp different
962+
for idx, ch in enumerate(chassis):
963+
self.sb_api.db_set('Chassis_Private', ch.name,
964+
('nb_cfg_timestamp', idx)).execute()
965+
966+
ch_private_dict = {} # host: [ch_private1, ch_private2, ...]
967+
for hostname in hostnames:
968+
ch_private_list = []
969+
for ch in (ch for ch in chassis if ch.hostname == hostname):
970+
ch_private = self.sb_api.lookup('Chassis_Private', ch.name,
971+
default=None)
972+
if ch_private:
973+
# One of the "Chassis_Private" has been deleted on purpose
974+
# in this test.
975+
ch_private_list.append(ch_private)
976+
ch_private_list.sort(key=lambda x: x.nb_cfg_timestamp,
977+
reverse=True)
978+
ch_private_dict[hostname] = ch_private_list
979+
980+
self.maint.remove_duplicated_chassis_registers()
981+
chassis_result = self.sb_api.chassis_list().execute(check_error=True)
982+
self.assertEqual(2, len(chassis_result))
983+
for ch in chassis_result:
984+
self.assertIn(ch.hostname, hostnames)
985+
hostnames.remove(ch.hostname)
986+
# From ch_private_dict[ch.hostname], we retrieve the first
987+
# "Chassis_Private" register because these are ordered by
988+
# timestamp. The newer one (bigger timestamp) should remain in the
989+
# system.
990+
ch_expected = ch_private_dict[ch.hostname][0].chassis[0]
991+
self.assertEqual(ch_expected.name, ch.name)
992+
993+
def test_remove_duplicated_chassis_registers_no_ch_private_register(self):
994+
for _ in range(2):
995+
self.add_fake_chassis('host1')
996+
997+
chassis = self.sb_api.chassis_list().execute(check_error=True)
998+
self.assertEqual(2, len(chassis))
999+
# Make the chassis private timestamp different
1000+
# Delete on of the "Chassis_Private" registers.
1001+
self.sb_api.db_destroy('Chassis_Private', chassis[0].name).execute()
1002+
self.sb_api.db_set('Chassis_Private', chassis[1].name,
1003+
('nb_cfg_timestamp', 1)).execute()
1004+
1005+
self.maint.remove_duplicated_chassis_registers()
1006+
chassis_result = self.sb_api.chassis_list().execute(check_error=True)
1007+
# Both "Chassis" registers are still in the DB because one
1008+
# "Chassis_Private" register was missing.
1009+
self.assertEqual(2, len(chassis_result))
1010+
9531011

9541012
class TestLogMaintenance(_TestMaintenanceHelper,
9551013
test_log_driver.LogApiTestCaseBase):
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
---
2+
issues:
3+
- |
4+
When using ML2/OVN, during an upgrade procedure, the OVS system-id stored
5+
value can be changed. The ovn-controller service will create the "Chassis"
6+
and "Chassis_Private" registers based on this OVS system-id. If the
7+
ovn-controller process is not gracefully stopped, that could lead to the
8+
existence of duplicated "Chassis" and "Chassis_Private" registers in the
9+
OVN Southbound database.
10+
fixes:
11+
- |
12+
A new OVN maintenance method ``remove_duplicated_chassis_registers`` is
13+
added. This method will periodically check the OVN Southbound "Chassis"
14+
and "Chassis_Private" tables looking for duplicated registers. The older
15+
ones (based on the "Chassis_Private.nb_cfg_timestamp" value) will be
16+
removed when more than one register has the same hostname, that should
17+
be unique.

0 commit comments

Comments
 (0)