Skip to content

Commit 1fa808a

Browse files
Ndancejicstepanblyschak
authored andcommitted
[muxorch] Fix handling mux neighbors learned after route (#3937)
* [muxorch] Fix handling mux neighbors learned after route Mux neighbors that were learned in standby state when a route entry is present were not being disabled properly due to still being referenced by the route. This change calls updateNextHopRoutes and decreases the nexthop refcount before disabling the neighbor, allowing the neighbor to be removed. Also applies fix that passes in the port_name on initial fdb add. What I did Update routes pointing to newly learned neighbor in standby state and update refcount Update refcount for routes updated when neighbor is learned in active state Moved update logic to after neighbor gets added to mux port Pass port name when adding Fdb entry Add vstest to test changes Why I did it Neighbors added while a route is present were not being disabled due to nonzero refcounts. This caused traffic to blackhole and dualtor_neighbor_check.py to fail due to mismatch between APPL and ASIC state. How I verified it Added vstest with test cases of adding neighbor while route exists in standby and active and toggling between the two states.
1 parent 925ce60 commit 1fa808a

File tree

7 files changed

+171
-13
lines changed

7 files changed

+171
-13
lines changed

orchagent/fdborch.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -855,6 +855,8 @@ void FdbOrch::doTask(Consumer& consumer)
855855
}
856856
}
857857

858+
// set entry port_name, which is used in mux fdb update logic
859+
entry.port_name = port;
858860

859861
FdbData fdbData;
860862
fdbData.bridge_port_id = SAI_NULL_OBJECT_ID;

orchagent/muxorch.cpp

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -658,7 +658,6 @@ void MuxCable::updateNeighbor(NextHopKey nh, bool add)
658658
SWSS_LOG_NOTICE("Processing update on neighbor %s for mux %s, add %d, state %d",
659659
nh.ip_address.to_string().c_str(), mux_name_.c_str(), add, state_);
660660
sai_object_id_t tnh = mux_orch_->getNextHopTunnelId(MUX_TUNNEL, peer_ip4_);
661-
nbr_handler_->update(nh, tnh, add, state_);
662661
if (add)
663662
{
664663
mux_orch_->addNexthop(nh, mux_name_);
@@ -667,7 +666,7 @@ void MuxCable::updateNeighbor(NextHopKey nh, bool add)
667666
{
668667
mux_orch_->removeNexthop(nh);
669668
}
670-
669+
nbr_handler_->update(nh, tnh, add, state_);
671670
updateRoutesForNextHop(nh);
672671
}
673672

@@ -742,9 +741,12 @@ void MuxNbrHandler::update(NextHopKey nh, sai_object_id_t tunnelId, bool add, Mu
742741
neighbors_[nh.ip_address] = gNeighOrch->getLocalNextHopId(nh);
743742
gNeighOrch->enableNeighbor(nh);
744743
gRouteOrch->updateNextHopRoutes(nh, num_routes);
744+
gNeighOrch->increaseNextHopRefCount(nh, num_routes);
745745
break;
746746
case MuxState::MUX_STATE_STANDBY:
747747
neighbors_[nh.ip_address] = tunnelId;
748+
gRouteOrch->updateNextHopRoutes(nh, num_routes);
749+
gNeighOrch->decreaseNextHopRefCount(nh, num_routes);
748750
gNeighOrch->disableNeighbor(nh);
749751
updateTunnelRoute(nh, true);
750752
create_route(pfx, tunnelId);
@@ -1561,6 +1563,7 @@ void MuxOrch::updateNeighbor(const NeighborUpdate& update)
15611563
auto it = mux_nexthop_tb_.find(update.entry);
15621564
if (it != mux_nexthop_tb_.end())
15631565
{
1566+
SWSS_LOG_INFO("port %s, nexthop %s", port.c_str(), it->second.c_str());
15641567
port = it->second;
15651568
removeNexthop(update.entry);
15661569
}
@@ -1770,6 +1773,26 @@ bool MuxOrch::handleMuxCfg(const Request& request)
17701773
(MuxCable(port_name, srv_ip, srv_ip6, mux_peer_switch_, cable_type));
17711774
addSkipNeighbors(skip_neighbors);
17721775

1776+
// Add neighbors that were learned before this mux port was configured.
1777+
NeighborTable m_neighbors;
1778+
gNeighOrch->getMuxNeighborsForPort(port_name, m_neighbors);
1779+
for (const auto &entry : m_neighbors)
1780+
{
1781+
bool nexthop_found = containsNextHop(entry.first);
1782+
bool is_skip_neighbor = isSkipNeighbor(entry.first.ip_address);
1783+
if (!nexthop_found && !is_skip_neighbor)
1784+
{
1785+
SWSS_LOG_NOTICE("Neighbor %s on %s learned before mux port %s configured. updating...",
1786+
entry.first.ip_address.to_string().c_str(),
1787+
entry.second.mac.to_string().c_str(),
1788+
port_name.c_str()
1789+
);
1790+
1791+
NeighborUpdate neighbor_update = {entry.first, entry.second.mac, 1};
1792+
updateNeighbor(neighbor_update);
1793+
}
1794+
}
1795+
17731796
SWSS_LOG_NOTICE("Mux entry for port '%s' was added, cable type %d", port_name.c_str(), cable_type);
17741797
}
17751798
else

orchagent/muxorch.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -250,6 +250,7 @@ class MuxOrch : public Orch2, public Observer, public Subject
250250
enable_cache_neigh_updates_ = false;
251251
}
252252
void updateCachedNeighbors();
253+
bool getMuxPort(const MacAddress&, const string&, string&);
253254

254255
private:
255256
virtual bool addOperation(const Request& request);
@@ -261,8 +262,6 @@ class MuxOrch : public Orch2, public Observer, public Subject
261262
void updateNeighbor(const NeighborUpdate&);
262263
void updateFdb(const FdbUpdate&);
263264

264-
bool getMuxPort(const MacAddress&, const string&, string&);
265-
266265
/***
267266
* Methods for managing tunnel routes for neighbor IPs not associated
268267
* with a specific mux cable

orchagent/neighorch.cpp

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1006,6 +1006,30 @@ void NeighOrch::doTask(Consumer &consumer)
10061006
}
10071007
}
10081008

1009+
/* Gets all neighbor entries tied to a given mux port */
1010+
void NeighOrch::getMuxNeighborsForPort(string port_name, NeighborTable& m_neighbors)
1011+
{
1012+
SWSS_LOG_INFO("Getting mux neighbors on %s", port_name.c_str());
1013+
1014+
MuxOrch* mux_orch = gDirectory.get<MuxOrch*>();
1015+
string mux_port_name;
1016+
for (const auto &entry : m_syncdNeighbors)
1017+
{
1018+
// Check if mux port exists for given neighbor entry
1019+
mux_port_name = "";
1020+
if (!mux_orch->getMuxPort(entry.second.mac, entry.first.alias, mux_port_name) || mux_port_name.empty())
1021+
{
1022+
continue;
1023+
}
1024+
1025+
// Add to m_neighbors if entry found
1026+
if (mux_port_name == port_name)
1027+
{
1028+
m_neighbors.insert(entry);
1029+
}
1030+
}
1031+
}
1032+
10091033
bool NeighOrch::addNeighbor(NeighborContext& ctx)
10101034
{
10111035
SWSS_LOG_ENTER();

orchagent/neighorch.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,7 @@ class NeighOrch : public Orch, public Subject, public Observer
109109
void resolveNeighbor(const NeighborEntry &);
110110
void updateSrv6Nexthop(const NextHopKey &, const sai_object_id_t &);
111111
bool ifChangeInformRemoteNextHop(const string &, bool);
112+
void getMuxNeighborsForPort(string port_name, NeighborTable &m_neighbors);
112113

113114
void clearBulkers();
114115

orchagent/routeorch.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1854,8 +1854,8 @@ bool RouteOrch::updateNextHopRoutes(const NextHopKey& nextHop, uint32_t& numRout
18541854
continue;
18551855
}
18561856

1857-
SWSS_LOG_INFO("Updating route %s", (*rt).prefix.to_string().c_str());
18581857
next_hop_id = m_neighOrch->getNextHopId(nextHop);
1858+
SWSS_LOG_INFO("Updating route %s with nexthop %" PRIu64, (*rt).prefix.to_string().c_str(), (uint64_t)next_hop_id);
18591859

18601860
route_entry.vr_id = (*rt).vrf_id;
18611861
route_entry.switch_id = gSwitchId;

tests/test_mux.py

Lines changed: 117 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -223,11 +223,14 @@ def check_neigh_in_asic_db(self, asicdb, ip, expected=True):
223223

224224
return ''
225225

226-
def check_tnl_nexthop_in_asic_db(self, asicdb, expected=1):
226+
def check_tnl_nexthop_in_asic_db(self, asicdb, expected=None):
227227

228228
global tunnel_nh_id
229229

230-
nh = asicdb.wait_for_n_keys(self.ASIC_NEXTHOP_TABLE, expected)
230+
if expected:
231+
nh = asicdb.wait_for_n_keys(self.ASIC_NEXTHOP_TABLE, expected)
232+
else:
233+
nh = asicdb.get_keys(self.ASIC_NEXTHOP_TABLE)
231234

232235
for key in nh:
233236
fvs = asicdb.get_entry(self.ASIC_NEXTHOP_TABLE, key)
@@ -525,12 +528,15 @@ def create_and_test_fdb(self, appdb, asicdb, dvs, dvs_route):
525528

526529
self.del_fdb(dvs, "00-00-00-00-00-11")
527530

528-
def create_and_test_route(self, appdb, asicdb, dvs, dvs_route):
529-
531+
def create_and_test_route(self, appdb, asicdb, dvs, dvs_route, mac_Ethernet0, mac_Ethernet4):
530532
self.set_mux_state(appdb, "Ethernet0", "active")
531533

532534
rtprefix = "2.3.4.0/24"
533535

536+
# Make sure neighbor is present
537+
self.add_neighbor(dvs, self.SERV1_IPV4, mac_Ethernet0)
538+
self.add_neighbor(dvs, self.SERV2_IPV4, mac_Ethernet0)
539+
534540
dvs.runcmd(
535541
"vtysh -c \"configure terminal\" -c \"ip route " + rtprefix +
536542
" " + self.SERV1_IPV4 + "\""
@@ -546,6 +552,7 @@ def create_and_test_route(self, appdb, asicdb, dvs, dvs_route):
546552
# Change Mux state to Standby and verify route pointing to Tunnel
547553
self.set_mux_state(appdb, "Ethernet0", "standby")
548554

555+
self.check_tnl_nexthop_in_asic_db(asicdb)
549556
self.check_nexthop_in_asic_db(asicdb, rtkeys[0], True)
550557

551558
# Change Mux state back to Active and verify route is not pointing to Tunnel
@@ -581,6 +588,39 @@ def create_and_test_route(self, appdb, asicdb, dvs, dvs_route):
581588
" " + self.SERV1_IPV4 + "\""
582589
)
583590

591+
def create_and_test_route_learned_before_neighbor(self, appdb, asicdb, dvs, dvs_route, mac):
592+
rtprefix = "2.3.4.0/24"
593+
neigh = "192.168.0.110"
594+
mux_port = "Ethernet0"
595+
596+
nexthop_map = {"active": neigh, "standby": tunnel_nh_id}
597+
toggle_map = {"active": "standby", "standby": "active"}
598+
599+
for state in ["standby", "active"]:
600+
try:
601+
current_state = state
602+
self.set_mux_state(appdb, mux_port, current_state)
603+
604+
self.add_route(dvs, rtprefix, [neigh])
605+
time.sleep(1)
606+
self.add_neighbor(dvs, neigh, mac)
607+
608+
# Confirm route is pointing to tunnel nh
609+
self.check_route_nexthop(dvs_route, asicdb, rtprefix, nexthop_map[current_state], (current_state == "standby"))
610+
611+
# Toggle the mux a few times
612+
current_state = toggle_map[current_state]
613+
self.set_mux_state(appdb, mux_port, current_state)
614+
self.check_route_nexthop(dvs_route, asicdb, rtprefix, nexthop_map[current_state], (current_state == "standby"))
615+
616+
current_state = toggle_map[current_state]
617+
self.set_mux_state(appdb, mux_port, current_state)
618+
self.check_route_nexthop(dvs_route, asicdb, rtprefix, nexthop_map[current_state], (current_state == "standby"))
619+
620+
finally:
621+
self.del_route(dvs, rtprefix)
622+
self.del_neighbor(dvs, neigh)
623+
584624
def multi_nexthop_check(self, asicdb, dvs_route, route, nexthops, mux_states, non_mux_nexthop=None, expect_active=None):
585625
"""
586626
Checks if multi-mux route points to an active nexthop or tunnel.
@@ -1574,7 +1614,6 @@ def setup(self, dvs):
15741614
self.remove_qos_map(db, swsscommon.CFG_DSCP_TO_TC_MAP_TABLE_NAME, dscp_to_tc_map_oid)
15751615
self.remove_qos_map(db, swsscommon.CFG_TC_TO_PRIORITY_GROUP_MAP_TABLE_NAME, tc_to_pg_map_oid)
15761616

1577-
15781617
def test_Tunnel(self, dvs, setup_tunnel, restore_tunnel, testlog, setup):
15791618
""" test IPv4 Mux tunnel creation """
15801619
db = swsscommon.DBConnector(swsscommon.APPL_DB, dvs.redis_sock, 0)
@@ -1602,6 +1641,73 @@ def test_Peer(self, dvs, setup_peer_switch, setup_tunnel, setup, testlog):
16021641

16031642
self.create_and_test_peer(asicdb, encap_tc_to_dscp_map_id, encap_tc_to_queue_map_id)
16041643

1644+
def test_neighbor_learned_before_mux_config(self, dvs, dvs_route, setup, setup_vlan, setup_peer_switch, setup_tunnel, testlog):
1645+
""" test neighbors learned before mux config """
1646+
test_ip_v4 = "192.168.0.110"
1647+
test_ip_v6 = "fc02:1000::110"
1648+
1649+
toggle_map = {"active": "standby", "standby": "active"}
1650+
1651+
asicdb = dvs.get_asic_db()
1652+
config_db = dvs.get_config_db()
1653+
appdb = swsscommon.DBConnector(swsscommon.APPL_DB, dvs.redis_sock, 0)
1654+
1655+
dvs.runcmd("ip neigh flush all")
1656+
for create_route in [False, True]:
1657+
for state in ["active", "standby"]:
1658+
try:
1659+
current_state = state
1660+
1661+
# Step 1.a: add neighbor on port
1662+
self.add_fdb(dvs, "Ethernet4", "00-00-00-11-11-11")
1663+
1664+
self.add_neighbor(dvs, test_ip_v4, "00:00:00:11:11:11")
1665+
self.check_neigh_in_asic_db(asicdb, test_ip_v4, expected=True)
1666+
1667+
self.add_neighbor(dvs, test_ip_v6, "00:00:00:11:11:11")
1668+
self.check_neigh_in_asic_db(asicdb, test_ip_v6, expected=True)
1669+
1670+
if create_route:
1671+
# Step 1.b: Create a route pointing to the neighbor
1672+
self.add_route(dvs, "11.11.11.11/32", ["192.168.0.100"])
1673+
1674+
# Step 2: configure mux port and verify neighbor state.
1675+
self.set_mux_state(appdb, "Ethernet4", current_state)
1676+
fvs = {"server_ipv4": self.SERV2_IPV4+self.IPV4_MASK,
1677+
"server_ipv6": self.SERV2_IPV6+self.IPV6_MASK}
1678+
config_db.create_entry(self.CONFIG_MUX_CABLE, "Ethernet4", fvs)
1679+
1680+
self.check_neigh_in_asic_db(asicdb, test_ip_v4, expected=(current_state != "standby"))
1681+
self.check_tunnel_route_in_app_db(dvs, [test_ip_v4+self.IPV4_MASK], expected=(current_state == "standby"))
1682+
self.check_neigh_in_asic_db(asicdb, test_ip_v6, expected=(current_state != "standby"))
1683+
self.check_tunnel_route_in_app_db(dvs, [test_ip_v6+self.IPV6_MASK], expected=(current_state == "standby"))
1684+
1685+
# Step 3: toggle mux state and verify neighbor state.
1686+
current_state = toggle_map[current_state]
1687+
self.set_mux_state(appdb, "Ethernet4", current_state)
1688+
1689+
self.check_neigh_in_asic_db(asicdb, test_ip_v4, expected=(current_state != "standby"))
1690+
self.check_tunnel_route_in_app_db(dvs, [test_ip_v4+self.IPV4_MASK], expected=(current_state == "standby"))
1691+
self.check_neigh_in_asic_db(asicdb, test_ip_v6, expected=(current_state != "standby"))
1692+
self.check_tunnel_route_in_app_db(dvs, [test_ip_v6+self.IPV6_MASK], expected=(current_state == "standby"))
1693+
1694+
# Step 4: toggle mux state back to initial state and verify neighbor state.
1695+
current_state = toggle_map[current_state]
1696+
self.set_mux_state(appdb, "Ethernet4", current_state)
1697+
1698+
self.check_neigh_in_asic_db(asicdb, test_ip_v4, expected=(current_state != "standby"))
1699+
self.check_tunnel_route_in_app_db(dvs, [test_ip_v4+self.IPV4_MASK], expected=(current_state == "standby"))
1700+
self.check_neigh_in_asic_db(asicdb, test_ip_v6, expected=(current_state != "standby"))
1701+
self.check_tunnel_route_in_app_db(dvs, [test_ip_v6+self.IPV6_MASK], expected=(current_state == "standby"))
1702+
1703+
finally:
1704+
if create_route:
1705+
self.del_route(dvs, "11.11.11.11/32")
1706+
self.del_neighbor(dvs, test_ip_v4)
1707+
self.del_neighbor(dvs, test_ip_v6)
1708+
config_db.delete_entry(self.CONFIG_MUX_CABLE, "Ethernet4")
1709+
dvs.runcmd("ip neigh flush all")
1710+
16051711
def test_Neighbor(self, dvs, dvs_route, setup_vlan, setup_mux_cable, testlog):
16061712
""" test Neighbor entries and mux state change """
16071713

@@ -1619,13 +1725,16 @@ def test_Fdb(self, dvs, dvs_route, testlog):
16191725

16201726
self.create_and_test_fdb(appdb, asicdb, dvs, dvs_route)
16211727

1622-
def test_Route(self, dvs, dvs_route, testlog):
1728+
def test_Route(self, dvs, intf_fdb_map, dvs_route, setup, setup_vlan, setup_peer_switch, setup_tunnel, setup_mux_cable, testlog):
16231729
""" test Route entries and mux state change """
16241730

16251731
appdb = swsscommon.DBConnector(swsscommon.APPL_DB, dvs.redis_sock, 0)
16261732
asicdb = dvs.get_asic_db()
1733+
mac_Ethernet0 = intf_fdb_map["Ethernet0"]
1734+
mac_Ethernet4 = intf_fdb_map["Ethernet4"]
16271735

1628-
self.create_and_test_route(appdb, asicdb, dvs, dvs_route)
1736+
self.create_and_test_route(appdb, asicdb, dvs, dvs_route, mac_Ethernet0, mac_Ethernet4)
1737+
self.create_and_test_route_learned_before_neighbor(appdb, asicdb, dvs, dvs_route, mac_Ethernet0)
16291738

16301739
def test_NH(self, dvs, dvs_route, intf_fdb_map, setup, setup_mux_cable,
16311740
setup_peer_switch, setup_tunnel, testlog):
@@ -1635,7 +1744,7 @@ def test_NH(self, dvs, dvs_route, intf_fdb_map, setup, setup_mux_cable,
16351744
mac = intf_fdb_map["Ethernet0"]
16361745

16371746
# get tunnel nexthop
1638-
self.check_tnl_nexthop_in_asic_db(asicdb, 5)
1747+
self.check_tnl_nexthop_in_asic_db(asicdb)
16391748

16401749
self.create_and_test_NH_routes(appdb, asicdb, dvs, dvs_route, mac)
16411750

0 commit comments

Comments
 (0)