Skip to content

Commit a1547ab

Browse files
committed
[OVN] Remove SB "Chassis"/"Chassis_Private" duplicated registers
A new OVN maintenance method is added. This method lists all existing OVN SB Chassis registers and checks if any of them has the same hostname. In case of having duplicated "Chassis"/"Chassis_Private" registers, the maintenance method will remove those with older (lower) timestamp, that is stored in "Chassis_Private.nb_cfg_timestamp", leaving only the newer one. Conflicts: neutron/tests/functional/plugins/ml2/drivers/ovn/mech_driver/ovsdb/test_maintenance.py Closes-Bug: #2016158 Change-Id: Ib3c6f0dc01efd31430691e720ba23ccb4ede65fa (cherry picked from commit 9d9f47c) (cherry picked from commit 7fc12de)
1 parent d539460 commit a1547ab

File tree

3 files changed

+132
-0
lines changed

3 files changed

+132
-0
lines changed

neutron/plugins/ml2/drivers/ovn/mech_driver/ovsdb/maintenance.py

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -975,6 +975,63 @@ def check_fair_meter_consistency(self):
975975
from_reload=True)
976976
raise periodics.NeverAgain()
977977

978+
@periodics.periodic(spacing=300, run_immediately=True)
979+
def remove_duplicated_chassis_registers(self):
980+
"""Remove the "Chassis" and "Chassis_Private" duplicated registers.
981+
982+
When the ovn-controller service of a node is updated and the system-id
983+
is changed, if the old service is not stopped gracefully, it will leave
984+
a "Chassis" and a "Chassis_Private" registers on the OVN SB database.
985+
These leftovers must be removed.
986+
987+
NOTE: this method is executed every 5 minutes. If a new chassis is
988+
added, this method will perform again the clean-up process.
989+
990+
NOTE: this method can be executed only if the OVN SB has the
991+
"Chassis_Private" table. Otherwise, is not possible to find out which
992+
register is newer and thus must be kept in the database.
993+
"""
994+
if not self._sb_idl.is_table_present('Chassis_Private'):
995+
raise periodics.NeverAgain()
996+
997+
if not self.has_lock:
998+
return
999+
1000+
# dup_chassis_port_host = {host_name: [(ch1, ch_private1),
1001+
# (ch2, ch_private2), ... ]}
1002+
dup_chassis_port_host = {}
1003+
chassis = self._sb_idl.chassis_list().execute(check_error=True)
1004+
chassis_hostnames = {ch.hostname for ch in chassis}
1005+
# Find the duplicated "Chassis" and "Chassis_Private" registers,
1006+
# comparing the hostname.
1007+
for hostname in chassis_hostnames:
1008+
ch_list = []
1009+
# Find these chassis matching the hostname and create a list.
1010+
for ch in (ch for ch in chassis if ch.hostname == hostname):
1011+
ch_private = self._sb_idl.lookup('Chassis_Private', ch.name,
1012+
default=None)
1013+
if ch_private:
1014+
ch_list.append((ch, ch_private))
1015+
1016+
# If the chassis list > 1, then we have duplicated chassis.
1017+
if len(ch_list) > 1:
1018+
# Order ch_list by Chassis_Private.nb_cfg_timestamp, from newer
1019+
# (greater value) to older.
1020+
ch_list.sort(key=lambda x: x[1].nb_cfg_timestamp, reverse=True)
1021+
dup_chassis_port_host[hostname] = ch_list
1022+
1023+
if not dup_chassis_port_host:
1024+
return
1025+
1026+
# Remove the "Chassis" and "Chassis_Private" registers with the
1027+
# older Chassis_Private.nb_cfg_timestamp.
1028+
with self._sb_idl.transaction(check_error=True) as txn:
1029+
for ch_list in dup_chassis_port_host.values():
1030+
# The first item is skipped, this is the newest element.
1031+
for ch, ch_private in ch_list[1:]:
1032+
for table in ('Chassis_Private', 'Chassis'):
1033+
txn.add(self._sb_idl.db_destroy(table, ch.name))
1034+
9781035

9791036
class HashRingHealthCheckPeriodics(object):
9801037

neutron/tests/functional/plugins/ml2/drivers/ovn/mech_driver/ovsdb/test_maintenance.py

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -950,6 +950,64 @@ def _verify_lb(test, protocol, vip_ext_port, vip_int_port):
950950
# Assert load balancer for port forwarding is gone
951951
self.assertFalse(self._find_pf_lb(router_id, fip_id))
952952

953+
def test_remove_duplicated_chassis_registers(self):
954+
hostnames = ['host1', 'host2']
955+
for hostname in hostnames:
956+
for _ in range(3):
957+
self.add_fake_chassis(hostname)
958+
959+
chassis = self.sb_api.chassis_list().execute(check_error=True)
960+
self.assertEqual(6, len(chassis))
961+
# Make the chassis private timestamp different
962+
for idx, ch in enumerate(chassis):
963+
self.sb_api.db_set('Chassis_Private', ch.name,
964+
('nb_cfg_timestamp', idx)).execute()
965+
966+
ch_private_dict = {} # host: [ch_private1, ch_private2, ...]
967+
for hostname in hostnames:
968+
ch_private_list = []
969+
for ch in (ch for ch in chassis if ch.hostname == hostname):
970+
ch_private = self.sb_api.lookup('Chassis_Private', ch.name,
971+
default=None)
972+
if ch_private:
973+
# One of the "Chassis_Private" has been deleted on purpose
974+
# in this test.
975+
ch_private_list.append(ch_private)
976+
ch_private_list.sort(key=lambda x: x.nb_cfg_timestamp,
977+
reverse=True)
978+
ch_private_dict[hostname] = ch_private_list
979+
980+
self.maint.remove_duplicated_chassis_registers()
981+
chassis_result = self.sb_api.chassis_list().execute(check_error=True)
982+
self.assertEqual(2, len(chassis_result))
983+
for ch in chassis_result:
984+
self.assertIn(ch.hostname, hostnames)
985+
hostnames.remove(ch.hostname)
986+
# From ch_private_dict[ch.hostname], we retrieve the first
987+
# "Chassis_Private" register because these are ordered by
988+
# timestamp. The newer one (bigger timestamp) should remain in the
989+
# system.
990+
ch_expected = ch_private_dict[ch.hostname][0].chassis[0]
991+
self.assertEqual(ch_expected.name, ch.name)
992+
993+
def test_remove_duplicated_chassis_registers_no_ch_private_register(self):
994+
for _ in range(2):
995+
self.add_fake_chassis('host1')
996+
997+
chassis = self.sb_api.chassis_list().execute(check_error=True)
998+
self.assertEqual(2, len(chassis))
999+
# Make the chassis private timestamp different
1000+
# Delete on of the "Chassis_Private" registers.
1001+
self.sb_api.db_destroy('Chassis_Private', chassis[0].name).execute()
1002+
self.sb_api.db_set('Chassis_Private', chassis[1].name,
1003+
('nb_cfg_timestamp', 1)).execute()
1004+
1005+
self.maint.remove_duplicated_chassis_registers()
1006+
chassis_result = self.sb_api.chassis_list().execute(check_error=True)
1007+
# Both "Chassis" registers are still in the DB because one
1008+
# "Chassis_Private" register was missing.
1009+
self.assertEqual(2, len(chassis_result))
1010+
9531011

9541012
class TestLogMaintenance(_TestMaintenanceHelper,
9551013
test_log_driver.LogApiTestCaseBase):
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
---
2+
issues:
3+
- |
4+
When using ML2/OVN, during an upgrade procedure, the OVS system-id stored
5+
value can be changed. The ovn-controller service will create the "Chassis"
6+
and "Chassis_Private" registers based on this OVS system-id. If the
7+
ovn-controller process is not gracefully stopped, that could lead to the
8+
existence of duplicated "Chassis" and "Chassis_Private" registers in the
9+
OVN Southbound database.
10+
fixes:
11+
- |
12+
A new OVN maintenance method ``remove_duplicated_chassis_registers`` is
13+
added. This method will periodically check the OVN Southbound "Chassis"
14+
and "Chassis_Private" tables looking for duplicated registers. The older
15+
ones (based on the "Chassis_Private.nb_cfg_timestamp" value) will be
16+
removed when more than one register has the same hostname, that should
17+
be unique.

0 commit comments

Comments
 (0)