Skip to content

Commit 9857288

Browse files
Zuulopenstack-gerrit
authored andcommitted
Merge "[OVN] Remove SB "Chassis"/"Chassis_Private" duplicated registers" into stable/zed
2 parents 758de1f + a1547ab commit 9857288

File tree

3 files changed

+132
-0
lines changed

3 files changed

+132
-0
lines changed

neutron/plugins/ml2/drivers/ovn/mech_driver/ovsdb/maintenance.py

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -975,6 +975,63 @@ def check_fair_meter_consistency(self):
975975
from_reload=True)
976976
raise periodics.NeverAgain()
977977

978+
@periodics.periodic(spacing=300, run_immediately=True)
979+
def remove_duplicated_chassis_registers(self):
980+
"""Remove the "Chassis" and "Chassis_Private" duplicated registers.
981+
982+
When the ovn-controller service of a node is updated and the system-id
983+
is changed, if the old service is not stopped gracefully, it will leave
984+
a "Chassis" and a "Chassis_Private" registers on the OVN SB database.
985+
These leftovers must be removed.
986+
987+
NOTE: this method is executed every 5 minutes. If a new chassis is
988+
added, this method will perform again the clean-up process.
989+
990+
NOTE: this method can be executed only if the OVN SB has the
991+
"Chassis_Private" table. Otherwise, is not possible to find out which
992+
register is newer and thus must be kept in the database.
993+
"""
994+
if not self._sb_idl.is_table_present('Chassis_Private'):
995+
raise periodics.NeverAgain()
996+
997+
if not self.has_lock:
998+
return
999+
1000+
# dup_chassis_port_host = {host_name: [(ch1, ch_private1),
1001+
# (ch2, ch_private2), ... ]}
1002+
dup_chassis_port_host = {}
1003+
chassis = self._sb_idl.chassis_list().execute(check_error=True)
1004+
chassis_hostnames = {ch.hostname for ch in chassis}
1005+
# Find the duplicated "Chassis" and "Chassis_Private" registers,
1006+
# comparing the hostname.
1007+
for hostname in chassis_hostnames:
1008+
ch_list = []
1009+
# Find these chassis matching the hostname and create a list.
1010+
for ch in (ch for ch in chassis if ch.hostname == hostname):
1011+
ch_private = self._sb_idl.lookup('Chassis_Private', ch.name,
1012+
default=None)
1013+
if ch_private:
1014+
ch_list.append((ch, ch_private))
1015+
1016+
# If the chassis list > 1, then we have duplicated chassis.
1017+
if len(ch_list) > 1:
1018+
# Order ch_list by Chassis_Private.nb_cfg_timestamp, from newer
1019+
# (greater value) to older.
1020+
ch_list.sort(key=lambda x: x[1].nb_cfg_timestamp, reverse=True)
1021+
dup_chassis_port_host[hostname] = ch_list
1022+
1023+
if not dup_chassis_port_host:
1024+
return
1025+
1026+
# Remove the "Chassis" and "Chassis_Private" registers with the
1027+
# older Chassis_Private.nb_cfg_timestamp.
1028+
with self._sb_idl.transaction(check_error=True) as txn:
1029+
for ch_list in dup_chassis_port_host.values():
1030+
# The first item is skipped, this is the newest element.
1031+
for ch, ch_private in ch_list[1:]:
1032+
for table in ('Chassis_Private', 'Chassis'):
1033+
txn.add(self._sb_idl.db_destroy(table, ch.name))
1034+
9781035

9791036
class HashRingHealthCheckPeriodics(object):
9801037

neutron/tests/functional/plugins/ml2/drivers/ovn/mech_driver/ovsdb/test_maintenance.py

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -950,6 +950,64 @@ def _verify_lb(test, protocol, vip_ext_port, vip_int_port):
950950
# Assert load balancer for port forwarding is gone
951951
self.assertFalse(self._find_pf_lb(router_id, fip_id))
952952

953+
def test_remove_duplicated_chassis_registers(self):
954+
hostnames = ['host1', 'host2']
955+
for hostname in hostnames:
956+
for _ in range(3):
957+
self.add_fake_chassis(hostname)
958+
959+
chassis = self.sb_api.chassis_list().execute(check_error=True)
960+
self.assertEqual(6, len(chassis))
961+
# Make the chassis private timestamp different
962+
for idx, ch in enumerate(chassis):
963+
self.sb_api.db_set('Chassis_Private', ch.name,
964+
('nb_cfg_timestamp', idx)).execute()
965+
966+
ch_private_dict = {} # host: [ch_private1, ch_private2, ...]
967+
for hostname in hostnames:
968+
ch_private_list = []
969+
for ch in (ch for ch in chassis if ch.hostname == hostname):
970+
ch_private = self.sb_api.lookup('Chassis_Private', ch.name,
971+
default=None)
972+
if ch_private:
973+
# One of the "Chassis_Private" has been deleted on purpose
974+
# in this test.
975+
ch_private_list.append(ch_private)
976+
ch_private_list.sort(key=lambda x: x.nb_cfg_timestamp,
977+
reverse=True)
978+
ch_private_dict[hostname] = ch_private_list
979+
980+
self.maint.remove_duplicated_chassis_registers()
981+
chassis_result = self.sb_api.chassis_list().execute(check_error=True)
982+
self.assertEqual(2, len(chassis_result))
983+
for ch in chassis_result:
984+
self.assertIn(ch.hostname, hostnames)
985+
hostnames.remove(ch.hostname)
986+
# From ch_private_dict[ch.hostname], we retrieve the first
987+
# "Chassis_Private" register because these are ordered by
988+
# timestamp. The newer one (bigger timestamp) should remain in the
989+
# system.
990+
ch_expected = ch_private_dict[ch.hostname][0].chassis[0]
991+
self.assertEqual(ch_expected.name, ch.name)
992+
993+
def test_remove_duplicated_chassis_registers_no_ch_private_register(self):
994+
for _ in range(2):
995+
self.add_fake_chassis('host1')
996+
997+
chassis = self.sb_api.chassis_list().execute(check_error=True)
998+
self.assertEqual(2, len(chassis))
999+
# Make the chassis private timestamp different
1000+
# Delete on of the "Chassis_Private" registers.
1001+
self.sb_api.db_destroy('Chassis_Private', chassis[0].name).execute()
1002+
self.sb_api.db_set('Chassis_Private', chassis[1].name,
1003+
('nb_cfg_timestamp', 1)).execute()
1004+
1005+
self.maint.remove_duplicated_chassis_registers()
1006+
chassis_result = self.sb_api.chassis_list().execute(check_error=True)
1007+
# Both "Chassis" registers are still in the DB because one
1008+
# "Chassis_Private" register was missing.
1009+
self.assertEqual(2, len(chassis_result))
1010+
9531011

9541012
class TestLogMaintenance(_TestMaintenanceHelper,
9551013
test_log_driver.LogApiTestCaseBase):
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
---
2+
issues:
3+
- |
4+
When using ML2/OVN, during an upgrade procedure, the OVS system-id stored
5+
value can be changed. The ovn-controller service will create the "Chassis"
6+
and "Chassis_Private" registers based on this OVS system-id. If the
7+
ovn-controller process is not gracefully stopped, that could lead to the
8+
existence of duplicated "Chassis" and "Chassis_Private" registers in the
9+
OVN Southbound database.
10+
fixes:
11+
- |
12+
A new OVN maintenance method ``remove_duplicated_chassis_registers`` is
13+
added. This method will periodically check the OVN Southbound "Chassis"
14+
and "Chassis_Private" tables looking for duplicated registers. The older
15+
ones (based on the "Chassis_Private.nb_cfg_timestamp" value) will be
16+
removed when more than one register has the same hostname, that should
17+
be unique.

0 commit comments

Comments
 (0)