Skip to content

Commit 61983d3

Browse files
committed
bgpd: Optimize BGP path lookup using typesafe hash for efficient lookup
Problem: -------- BGP path lookup currently uses O(N) linear search through the list (dest->info) for every incoming route update. In high-ECMP scenarios, each update requires iterating through the entire path list to check if a path from that peer already exists. This becomes a severe performance bottleneck in data center environments with high ECMP during large-scale route updates churn. Solution: --------- Implement O(1) path lookup using FRR's typesafe hash in bgp_dest. Use the same in bgp_update() for efficient lookup. This introduces new hash in BGPd but improves CPU overhead ~60% Signed-off-by: Krishnasamy <krishnasamyr@nvidia.com>
1 parent 2fcb0b2 commit 61983d3

File tree

4 files changed

+212
-6
lines changed

4 files changed

+212
-6
lines changed

bgpd/bgp_route.c

Lines changed: 194 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
#include "zclient.h"
3232
#include "frrdistance.h"
3333
#include "frregex_real.h"
34+
#include "jhash.h"
3435

3536
#include "bgpd/bgpd.h"
3637
#include "bgpd/bgp_nhc.h"
@@ -125,6 +126,40 @@ static const struct message bgp_pmsi_tnltype_str[] = {
125126

126127
static int clear_batch_rib_helper(struct bgp_clearing_info *cinfo);
127128

129+
/*
130+
* BGP Path Info Typesafe Hash Function
131+
*/
132+
int bgp_pi_hash_cmp(const struct bgp_path_info *p1, const struct bgp_path_info *p2)
133+
{
134+
if (p1->peer != p2->peer)
135+
return (p1->peer < p2->peer) ? -1 : 1;
136+
137+
if (p1->type != p2->type)
138+
return (p1->type < p2->type) ? -1 : 1;
139+
140+
if (p1->sub_type != p2->sub_type)
141+
return (p1->sub_type < p2->sub_type) ? -1 : 1;
142+
143+
if (p1->addpath_rx_id != p2->addpath_rx_id)
144+
return (p1->addpath_rx_id < p2->addpath_rx_id) ? -1 : 1;
145+
146+
/* All equal */
147+
return 0;
148+
}
149+
150+
/* Generate hash value from bgp_path_info key fields */
151+
uint32_t bgp_pi_hash_hashfn(const struct bgp_path_info *pi)
152+
{
153+
uint32_t h = 0;
154+
155+
h = jhash_1word((uint32_t)(uintptr_t)pi->peer, h);
156+
h = jhash_1word(pi->addpath_rx_id, h);
157+
h = jhash_1word((uint32_t)pi->type, h);
158+
h = jhash_1word((uint32_t)pi->sub_type, h);
159+
160+
return h;
161+
}
162+
128163
static inline char *bgp_route_dump_path_info_flags(struct bgp_path_info *pi,
129164
char *buf, size_t len)
130165
{
@@ -519,13 +554,21 @@ void bgp_path_info_add_with_caller(const char *name, struct bgp_dest *dest,
519554
frrtrace(3, frr_bgp, bgp_path_info_add, dest, pi, name);
520555
struct bgp_path_info *top;
521556

557+
/*
558+
* Initialize typesafe hash on first path add.
559+
* XCALLOC zeroes dest, so count is 0 initially.
560+
*/
561+
if (bgp_pi_hash_count(&dest->pi_hash) == 0)
562+
bgp_pi_hash_init(&dest->pi_hash);
563+
522564
top = bgp_dest_get_bgp_path_info(dest);
523565

524566
pi->next = top;
525567
pi->prev = NULL;
526568
if (top)
527569
top->prev = pi;
528570
bgp_dest_set_bgp_path_info(dest, pi);
571+
bgp_pi_hash_add(&dest->pi_hash, pi);
529572

530573
SET_FLAG(pi->flags, BGP_PATH_UNSORTED);
531574
bgp_path_info_lock(pi);
@@ -552,6 +595,7 @@ struct bgp_dest *bgp_path_info_reap(struct bgp_dest *dest,
552595
pi->next = NULL;
553596
pi->prev = NULL;
554597

598+
bgp_pi_hash_del(&dest->pi_hash, pi);
555599
if (pi->peer)
556600
pi->peer->stat_pfx_loc_rib--;
557601
hook_call(bgp_snmp_update_stats, dest, pi, false);
@@ -566,6 +610,7 @@ static struct bgp_dest *bgp_path_info_reap_unsorted(struct bgp_dest *dest,
566610
pi->next = NULL;
567611
pi->prev = NULL;
568612

613+
bgp_pi_hash_del(&dest->pi_hash, pi);
569614
if (pi->peer)
570615
pi->peer->stat_pfx_loc_rib--;
571616
hook_call(bgp_snmp_update_stats, dest, pi, false);
@@ -5324,12 +5369,12 @@ void bgp_update(struct peer *peer, const struct prefix *p, uint32_t addpath_id,
53245369
bgp_adj_in_set(dest, peer, attr, addpath_id, &bgp_labels);
53255370
}
53265371

5327-
/* Check previously received route. */
5328-
for (pi = bgp_dest_get_bgp_path_info(dest); pi; pi = pi->next)
5329-
if (pi->peer == peer && pi->type == type
5330-
&& pi->sub_type == sub_type
5331-
&& pi->addpath_rx_id == addpath_id)
5332-
break;
5372+
/* Check previously received route using O(1) typesafe hash lookup */
5373+
struct bgp_path_info pi_lookup = {
5374+
.peer = peer, .type = type, .sub_type = sub_type, .addpath_rx_id = addpath_id
5375+
};
5376+
5377+
pi = bgp_pi_hash_find(&dest->pi_hash, &pi_lookup);
53335378

53345379
/* AS path local-as loop check. */
53355380
if (peer->change_local_as) {
@@ -17807,6 +17852,148 @@ void bgp_config_write_distance(struct vty *vty, struct bgp *bgp, afi_t afi,
1780717852
}
1780817853
}
1780917854

17855+
/* Show BGP path info hash keys */
17856+
DEFUN(show_bgp_paths_hash,
17857+
show_bgp_paths_hash_cmd,
17858+
"show [ip] bgp [<view|vrf> VIEWVRFNAME] ["BGP_AFI_CMD_STR" ["BGP_SAFI_WITH_LABEL_CMD_STR"]]<A.B.C.D|A.B.C.D/M|X:X::X:X|X:X::X:X/M> paths-hash [json]",
17859+
SHOW_STR
17860+
IP_STR
17861+
BGP_STR
17862+
BGP_INSTANCE_HELP_STR
17863+
BGP_AFI_HELP_STR
17864+
BGP_SAFI_WITH_LABEL_HELP_STR
17865+
"Network in the BGP routing table to display\n"
17866+
"IPv4 prefix\n"
17867+
"Network in the BGP routing table to display\n"
17868+
"IPv6 prefix\n"
17869+
"Display hash keys for all paths to this prefix\n"
17870+
JSON_STR)
17871+
{
17872+
struct bgp *bgp = NULL;
17873+
afi_t afi = AFI_IP6;
17874+
safi_t safi = SAFI_UNICAST;
17875+
struct prefix prefix;
17876+
struct bgp_dest *dest;
17877+
struct bgp_path_info *pi;
17878+
int idx = 0;
17879+
bool uj = use_json(argc, argv);
17880+
json_object *json = NULL;
17881+
json_object *json_paths = NULL;
17882+
json_object *json_path = NULL;
17883+
char peer_str[INET6_ADDRSTRLEN];
17884+
int path_count = 0;
17885+
17886+
bgp_vty_find_and_parse_afi_safi_bgp(vty, argv, argc, &idx, &afi, &safi, &bgp, uj);
17887+
if (!idx)
17888+
return CMD_WARNING;
17889+
17890+
if (!argv_find(argv, argc, "A.B.C.D", &idx))
17891+
if (!argv_find(argv, argc, "X:X::X:X", &idx))
17892+
if (!argv_find(argv, argc, "A.B.C.D/M", &idx))
17893+
argv_find(argv, argc, "X:X::X:X/M", &idx);
17894+
17895+
if (str2prefix(argv[idx]->arg, &prefix) == 0) {
17896+
vty_out(vty, "%% Malformed prefix\n");
17897+
return CMD_WARNING;
17898+
}
17899+
17900+
/* Check if AFI matches prefix */
17901+
if ((afi == AFI_IP && prefix.family != AF_INET) ||
17902+
(afi == AFI_IP6 && prefix.family != AF_INET6)) {
17903+
vty_out(vty, "%% Prefix does not match AFI\n");
17904+
return CMD_WARNING;
17905+
}
17906+
17907+
dest = bgp_node_lookup(bgp->rib[afi][safi], &prefix);
17908+
if (!dest) {
17909+
if (uj) {
17910+
json = json_object_new_object();
17911+
json_object_string_add(json, "warning", "Network not in table");
17912+
vty_json(vty, json);
17913+
} else
17914+
vty_out(vty, "%% Network not in table\n");
17915+
return CMD_WARNING;
17916+
}
17917+
17918+
if (uj)
17919+
json = json_object_new_object();
17920+
17921+
/* Display hash information */
17922+
if (!uj) {
17923+
vty_out(vty, "BGP routing table entry for %s\n",
17924+
prefix2str(&prefix, peer_str, sizeof(peer_str)));
17925+
vty_out(vty, "Hash Keys for all paths:\n\n");
17926+
vty_out(vty, "%-45s %-15s %-15s %-20s %s\n", "Peer", "Type", "Sub-Type",
17927+
"AddPath-Rx-ID", "Path-Ptr");
17928+
vty_out(vty,
17929+
"------------------------------------------------------------------------------------------------------------------------\n");
17930+
} else {
17931+
json_paths = json_object_new_array();
17932+
}
17933+
17934+
/* Iterate through all paths in hash and display hash keys */
17935+
frr_each (bgp_pi_hash, &dest->pi_hash, pi) {
17936+
path_count++;
17937+
17938+
if (pi->peer) {
17939+
if (pi->peer->connection && pi->peer->connection->su_remote) {
17940+
sockunion2str(pi->peer->connection->su_remote, peer_str,
17941+
sizeof(peer_str));
17942+
} else
17943+
snprintf(peer_str, sizeof(peer_str), "%s", pi->peer->host);
17944+
} else {
17945+
snprintf(peer_str, sizeof(peer_str), "Unknown");
17946+
}
17947+
17948+
if (uj) {
17949+
char pi_ptr_str[32];
17950+
17951+
json_path = json_object_new_object();
17952+
json_object_string_add(json_path, "peer", peer_str);
17953+
json_object_int_add(json_path, "type", pi->type);
17954+
json_object_int_add(json_path, "subType", pi->sub_type);
17955+
json_object_int_add(json_path, "addpathRxId", pi->addpath_rx_id);
17956+
17957+
/* Show path info pointer */
17958+
snprintf(pi_ptr_str, sizeof(pi_ptr_str), "%p", pi);
17959+
json_object_string_add(json_path, "path-info", pi_ptr_str);
17960+
17961+
json_object_array_add(json_paths, json_path);
17962+
} else {
17963+
vty_out(vty, "%-45s %-15u %-15u %-20u %p\n", peer_str, pi->type,
17964+
pi->sub_type, pi->addpath_rx_id, pi);
17965+
}
17966+
}
17967+
17968+
if (uj) {
17969+
/* Add typesafe hash table information using APIs */
17970+
size_t hash_count = bgp_pi_hash_count(&dest->pi_hash);
17971+
17972+
json_object_object_add(json, "paths", json_paths);
17973+
json_object_int_add(json, "totalPaths", path_count);
17974+
if (hash_count > 0) {
17975+
json_object_int_add(json, "hashBuckets", HASH_SIZE(dest->pi_hash.hh));
17976+
json_object_int_add(json, "hashCount", hash_count);
17977+
}
17978+
17979+
vty_json(vty, json);
17980+
} else {
17981+
/* Display typesafe hash table info using APIs */
17982+
size_t hash_count = bgp_pi_hash_count(&dest->pi_hash);
17983+
17984+
vty_out(vty, "\nTotal paths: %d\n", path_count);
17985+
if (hash_count > 0) {
17986+
vty_out(vty, "Hash table: %u buckets, %zu entries\n",
17987+
HASH_SIZE(dest->pi_hash.hh), hash_count);
17988+
} else {
17989+
vty_out(vty, "Hash table: not initialized\n");
17990+
}
17991+
}
17992+
17993+
bgp_dest_unlock_node(dest);
17994+
return CMD_SUCCESS;
17995+
}
17996+
1781017997
/* Allocate routing table structure and install commands. */
1781117998
void bgp_route_init(void)
1781217999
{
@@ -17954,6 +18141,7 @@ void bgp_route_init(void)
1795418141

1795518142
install_element(VIEW_NODE, &show_bgp_listeners_cmd);
1795618143
install_element(VIEW_NODE, &show_bgp_peerhash_cmd);
18144+
install_element(VIEW_NODE, &show_bgp_paths_hash_cmd);
1795718145
}
1795818146

1795918147
void bgp_route_finish(void)

bgpd/bgp_route.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#include "hook.h"
1212
#include "queue.h"
1313
#include "nexthop.h"
14+
#include "typesafe.h"
1415
#include "bgp_table.h"
1516
#include "bgp_addpath_types.h"
1617
#include "bgp_rpki.h"
@@ -275,6 +276,8 @@ struct bgp_path_info {
275276
struct bgp_path_info *next;
276277
struct bgp_path_info *prev;
277278

279+
struct bgp_pi_hash_item pi_hash_link;
280+
278281
/* For nexthop linked list */
279282
LIST_ENTRY(bgp_path_info) nh_thread;
280283

@@ -372,6 +375,11 @@ struct bgp_path_info {
372375
} mplsvpn;
373376
};
374377

378+
extern int bgp_pi_hash_cmp(const struct bgp_path_info *p1, const struct bgp_path_info *p2);
379+
extern uint32_t bgp_pi_hash_hashfn(const struct bgp_path_info *pi);
380+
381+
DECLARE_HASH(bgp_pi_hash, struct bgp_path_info, pi_hash_link, bgp_pi_hash_cmp, bgp_pi_hash_hashfn);
382+
375383
/* Structure used in BGP path selection */
376384
struct bgp_path_info_pair {
377385
struct bgp_path_info *old;

bgpd/bgp_table.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,8 @@ inline struct bgp_dest *bgp_dest_unlock_node(struct bgp_dest *dest)
8383

8484
if (rn->lock == 1) {
8585
struct bgp_table *rt = bgp_dest_table(dest);
86+
87+
bgp_pi_hash_fini(&dest->pi_hash);
8688
if (rt->bgp) {
8789
bgp_addpath_free_node_data(&rt->bgp->tx_addpath,
8890
&dest->tx_addpath, rt->afi,
@@ -108,6 +110,7 @@ static void bgp_node_destroy(route_table_delegate_t *delegate,
108110
dest = bgp_dest_from_rnode(node);
109111
rt = table->info;
110112
if (dest) {
113+
bgp_pi_hash_fini(&dest->pi_hash);
111114
if (rt->bgp) {
112115
bgp_addpath_free_node_data(&rt->bgp->tx_addpath,
113116
&dest->tx_addpath,

bgpd/bgp_table.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,14 @@
1010
#include "table.h"
1111
#include "queue.h"
1212
#include "linklist.h"
13+
#include "typesafe.h"
1314
#include "bgpd.h"
1415
#include "bgp_advertise.h"
1516
#include "bgp_attr_srv6.h"
1617

18+
/* Predeclare typesafe hash for path_info lookup */
19+
PREDECL_HASH(bgp_pi_hash);
20+
1721
struct bgp_table {
1822
/* table belongs to this instance */
1923
struct bgp *bgp;
@@ -70,6 +74,9 @@ struct bgp_dest {
7074

7175
void *info;
7276

77+
/* Typesafe hash for efficient path lookup */
78+
struct bgp_pi_hash_head pi_hash;
79+
7380
struct bgp_adj_out_rb adj_out;
7481

7582
struct bgp_adj_in *adj_in;

0 commit comments

Comments
 (0)