Skip to content

Commit ba16962

Browse files
committed
[OVN] Remove SB "Chassis"/"Chassis_Private" duplicated registers
A new OVN maintenance method is added. This method lists all existing OVN SB Chassis registers and checks if any of them has the same hostname. In case of having duplicated "Chassis"/"Chassis_Private" registers, the maintenance method will remove those with older (lower) timestamp, that is stored in "Chassis_Private.nb_cfg_timestamp", leaving only the newer one. Conflicts: neutron/tests/functional/plugins/ml2/drivers/ovn/mech_driver/ovsdb/test_maintenance.py Closes-Bug: #2016158 Change-Id: Ib3c6f0dc01efd31430691e720ba23ccb4ede65fa (cherry picked from commit 9d9f47c) (cherry picked from commit 7fc12de)
1 parent 6309414 commit ba16962

File tree

3 files changed

+132
-0
lines changed

3 files changed

+132
-0
lines changed

neutron/plugins/ml2/drivers/ovn/mech_driver/ovsdb/maintenance.py

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -997,6 +997,63 @@ def check_fair_meter_consistency(self):
997997
from_reload=True)
998998
raise periodics.NeverAgain()
999999

1000+
@periodics.periodic(spacing=300, run_immediately=True)
1001+
def remove_duplicated_chassis_registers(self):
1002+
"""Remove the "Chassis" and "Chassis_Private" duplicated registers.
1003+
1004+
When the ovn-controller service of a node is updated and the system-id
1005+
is changed, if the old service is not stopped gracefully, it will leave
1006+
a "Chassis" and a "Chassis_Private" registers on the OVN SB database.
1007+
These leftovers must be removed.
1008+
1009+
NOTE: this method is executed every 5 minutes. If a new chassis is
1010+
added, this method will perform again the clean-up process.
1011+
1012+
NOTE: this method can be executed only if the OVN SB has the
1013+
"Chassis_Private" table. Otherwise, is not possible to find out which
1014+
register is newer and thus must be kept in the database.
1015+
"""
1016+
if not self._sb_idl.is_table_present('Chassis_Private'):
1017+
raise periodics.NeverAgain()
1018+
1019+
if not self.has_lock:
1020+
return
1021+
1022+
# dup_chassis_port_host = {host_name: [(ch1, ch_private1),
1023+
# (ch2, ch_private2), ... ]}
1024+
dup_chassis_port_host = {}
1025+
chassis = self._sb_idl.chassis_list().execute(check_error=True)
1026+
chassis_hostnames = {ch.hostname for ch in chassis}
1027+
# Find the duplicated "Chassis" and "Chassis_Private" registers,
1028+
# comparing the hostname.
1029+
for hostname in chassis_hostnames:
1030+
ch_list = []
1031+
# Find these chassis matching the hostname and create a list.
1032+
for ch in (ch for ch in chassis if ch.hostname == hostname):
1033+
ch_private = self._sb_idl.lookup('Chassis_Private', ch.name,
1034+
default=None)
1035+
if ch_private:
1036+
ch_list.append((ch, ch_private))
1037+
1038+
# If the chassis list > 1, then we have duplicated chassis.
1039+
if len(ch_list) > 1:
1040+
# Order ch_list by Chassis_Private.nb_cfg_timestamp, from newer
1041+
# (greater value) to older.
1042+
ch_list.sort(key=lambda x: x[1].nb_cfg_timestamp, reverse=True)
1043+
dup_chassis_port_host[hostname] = ch_list
1044+
1045+
if not dup_chassis_port_host:
1046+
return
1047+
1048+
# Remove the "Chassis" and "Chassis_Private" registers with the
1049+
# older Chassis_Private.nb_cfg_timestamp.
1050+
with self._sb_idl.transaction(check_error=True) as txn:
1051+
for ch_list in dup_chassis_port_host.values():
1052+
# The first item is skipped, this is the newest element.
1053+
for ch, ch_private in ch_list[1:]:
1054+
for table in ('Chassis_Private', 'Chassis'):
1055+
txn.add(self._sb_idl.db_destroy(table, ch.name))
1056+
10001057

10011058
class HashRingHealthCheckPeriodics(object):
10021059

neutron/tests/functional/plugins/ml2/drivers/ovn/mech_driver/ovsdb/test_maintenance.py

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1049,6 +1049,64 @@ def _verify_lb(test, protocol, vip_ext_port, vip_int_port):
10491049
# Assert load balancer for port forwarding is gone
10501050
self.assertFalse(self._find_pf_lb(router_id, fip_id))
10511051

1052+
def test_remove_duplicated_chassis_registers(self):
1053+
hostnames = ['host1', 'host2']
1054+
for hostname in hostnames:
1055+
for _ in range(3):
1056+
self.add_fake_chassis(hostname)
1057+
1058+
chassis = self.sb_api.chassis_list().execute(check_error=True)
1059+
self.assertEqual(6, len(chassis))
1060+
# Make the chassis private timestamp different
1061+
for idx, ch in enumerate(chassis):
1062+
self.sb_api.db_set('Chassis_Private', ch.name,
1063+
('nb_cfg_timestamp', idx)).execute()
1064+
1065+
ch_private_dict = {} # host: [ch_private1, ch_private2, ...]
1066+
for hostname in hostnames:
1067+
ch_private_list = []
1068+
for ch in (ch for ch in chassis if ch.hostname == hostname):
1069+
ch_private = self.sb_api.lookup('Chassis_Private', ch.name,
1070+
default=None)
1071+
if ch_private:
1072+
# One of the "Chassis_Private" has been deleted on purpose
1073+
# in this test.
1074+
ch_private_list.append(ch_private)
1075+
ch_private_list.sort(key=lambda x: x.nb_cfg_timestamp,
1076+
reverse=True)
1077+
ch_private_dict[hostname] = ch_private_list
1078+
1079+
self.maint.remove_duplicated_chassis_registers()
1080+
chassis_result = self.sb_api.chassis_list().execute(check_error=True)
1081+
self.assertEqual(2, len(chassis_result))
1082+
for ch in chassis_result:
1083+
self.assertIn(ch.hostname, hostnames)
1084+
hostnames.remove(ch.hostname)
1085+
# From ch_private_dict[ch.hostname], we retrieve the first
1086+
# "Chassis_Private" register because these are ordered by
1087+
# timestamp. The newer one (bigger timestamp) should remain in the
1088+
# system.
1089+
ch_expected = ch_private_dict[ch.hostname][0].chassis[0]
1090+
self.assertEqual(ch_expected.name, ch.name)
1091+
1092+
def test_remove_duplicated_chassis_registers_no_ch_private_register(self):
1093+
for _ in range(2):
1094+
self.add_fake_chassis('host1')
1095+
1096+
chassis = self.sb_api.chassis_list().execute(check_error=True)
1097+
self.assertEqual(2, len(chassis))
1098+
# Make the chassis private timestamp different
1099+
# Delete on of the "Chassis_Private" registers.
1100+
self.sb_api.db_destroy('Chassis_Private', chassis[0].name).execute()
1101+
self.sb_api.db_set('Chassis_Private', chassis[1].name,
1102+
('nb_cfg_timestamp', 1)).execute()
1103+
1104+
self.maint.remove_duplicated_chassis_registers()
1105+
chassis_result = self.sb_api.chassis_list().execute(check_error=True)
1106+
# Both "Chassis" registers are still in the DB because one
1107+
# "Chassis_Private" register was missing.
1108+
self.assertEqual(2, len(chassis_result))
1109+
10521110

10531111
class TestLogMaintenance(_TestMaintenanceHelper,
10541112
test_log_driver.LogApiTestCaseBase):
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
---
2+
issues:
3+
- |
4+
When using ML2/OVN, during an upgrade procedure, the OVS system-id stored
5+
value can be changed. The ovn-controller service will create the "Chassis"
6+
and "Chassis_Private" registers based on this OVS system-id. If the
7+
ovn-controller process is not gracefully stopped, that could lead to the
8+
existence of duplicated "Chassis" and "Chassis_Private" registers in the
9+
OVN Southbound database.
10+
fixes:
11+
- |
12+
A new OVN maintenance method ``remove_duplicated_chassis_registers`` is
13+
added. This method will periodically check the OVN Southbound "Chassis"
14+
and "Chassis_Private" tables looking for duplicated registers. The older
15+
ones (based on the "Chassis_Private.nb_cfg_timestamp" value) will be
16+
removed when more than one register has the same hostname, that should
17+
be unique.

0 commit comments

Comments
 (0)