Skip to content

Commit d6490dc

Browse files
committed
bgpd: Optimize BGP path lookup using typesafe hash for efficient lookup
Problem: -------- BGP path lookup currently uses O(N) linear search through the list (dest->info) for every incoming route update. In high-ECMP scenarios, each update requires iterating through the entire path list to check if a path from that peer already exists. This becomes a severe performance bottleneck in data center environments with high ECMP during large-scale route updates churn. Solution: --------- Implement a path_info hash per table using typesafe hash. Use the same in bgp_update() for efficient lookup. This hash lookup improves CPU overhead upto ~60% Signed-off-by: Krishnasamy R <krishnasamyr@nvidia.com>
1 parent 2fcb0b2 commit d6490dc

File tree

5 files changed

+122
-7
lines changed

5 files changed

+122
-7
lines changed

bgpd/bgp_route.c

Lines changed: 77 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
#include "zclient.h"
3232
#include "frrdistance.h"
3333
#include "frregex_real.h"
34+
#include "jhash.h"
3435

3536
#include "bgpd/bgpd.h"
3637
#include "bgpd/bgp_nhc.h"
@@ -125,6 +126,52 @@ static const struct message bgp_pmsi_tnltype_str[] = {
125126

126127
static int clear_batch_rib_helper(struct bgp_clearing_info *cinfo);
127128

129+
/*
130+
* Typesafe Hash Functions for bgp_path_info
131+
* hash key: prefix, peer, type, sub_type, addpath_rx_id
132+
*/
133+
int bgp_pi_hash_cmp(const struct bgp_path_info *p1, const struct bgp_path_info *p2)
134+
{
135+
int ret;
136+
137+
/* Get the prefix from pi->net and compare */
138+
const struct prefix *pfx1 = bgp_dest_get_prefix(p1->net);
139+
const struct prefix *pfx2 = bgp_dest_get_prefix(p2->net);
140+
141+
ret = prefix_cmp(pfx1, pfx2);
142+
if (ret != 0)
143+
return ret;
144+
145+
if (p1->peer != p2->peer)
146+
return (p1->peer < p2->peer) ? -1 : 1;
147+
148+
if (p1->type != p2->type)
149+
return (p1->type < p2->type) ? -1 : 1;
150+
151+
if (p1->sub_type != p2->sub_type)
152+
return (p1->sub_type < p2->sub_type) ? -1 : 1;
153+
154+
if (p1->addpath_rx_id != p2->addpath_rx_id)
155+
return (p1->addpath_rx_id < p2->addpath_rx_id) ? -1 : 1;
156+
157+
/* All fields are equal */
158+
return 0;
159+
}
160+
161+
uint32_t bgp_pi_hash_hashfn(const struct bgp_path_info *pi)
162+
{
163+
uint32_t h = 0;
164+
const struct prefix *pfx = bgp_dest_get_prefix(pi->net);
165+
166+
h = prefix_hash_key(pfx);
167+
h = jhash_1word((uint32_t)(uintptr_t)pi->peer, h);
168+
h = jhash_1word(pi->addpath_rx_id, h);
169+
h = jhash_1word((uint32_t)pi->type, h);
170+
h = jhash_1word((uint32_t)pi->sub_type, h);
171+
172+
return h;
173+
}
174+
128175
static inline char *bgp_route_dump_path_info_flags(struct bgp_path_info *pi,
129176
char *buf, size_t len)
130177
{
@@ -518,6 +565,7 @@ void bgp_path_info_add_with_caller(const char *name, struct bgp_dest *dest,
518565
{
519566
frrtrace(3, frr_bgp, bgp_path_info_add, dest, pi, name);
520567
struct bgp_path_info *top;
568+
struct bgp_table *table;
521569

522570
top = bgp_dest_get_bgp_path_info(dest);
523571

@@ -527,6 +575,11 @@ void bgp_path_info_add_with_caller(const char *name, struct bgp_dest *dest,
527575
top->prev = pi;
528576
bgp_dest_set_bgp_path_info(dest, pi);
529577

578+
/* Add this path info to global hash (per AFI/SAFI table) */
579+
table = bgp_dest_table(dest);
580+
if (table)
581+
bgp_pi_hash_add(&table->pi_hash, pi);
582+
530583
SET_FLAG(pi->flags, BGP_PATH_UNSORTED);
531584
bgp_path_info_lock(pi);
532585
bgp_dest_lock_node(dest);
@@ -542,6 +595,8 @@ void bgp_path_info_add_with_caller(const char *name, struct bgp_dest *dest,
542595
struct bgp_dest *bgp_path_info_reap(struct bgp_dest *dest,
543596
struct bgp_path_info *pi)
544597
{
598+
struct bgp_table *table;
599+
545600
if (pi->next)
546601
pi->next->prev = pi->prev;
547602
if (pi->prev)
@@ -552,6 +607,11 @@ struct bgp_dest *bgp_path_info_reap(struct bgp_dest *dest,
552607
pi->next = NULL;
553608
pi->prev = NULL;
554609

610+
/* Remove this path from global hash (per AFI/SAFI table) */
611+
table = bgp_dest_table(dest);
612+
if (table)
613+
bgp_pi_hash_del(&table->pi_hash, pi);
614+
555615
if (pi->peer)
556616
pi->peer->stat_pfx_loc_rib--;
557617
hook_call(bgp_snmp_update_stats, dest, pi, false);
@@ -563,9 +623,16 @@ struct bgp_dest *bgp_path_info_reap(struct bgp_dest *dest,
563623
static struct bgp_dest *bgp_path_info_reap_unsorted(struct bgp_dest *dest,
564624
struct bgp_path_info *pi)
565625
{
626+
struct bgp_table *table;
627+
566628
pi->next = NULL;
567629
pi->prev = NULL;
568630

631+
/* Remove this path from global hash (per AFI/SAFI table) */
632+
table = bgp_dest_table(dest);
633+
if (table)
634+
bgp_pi_hash_del(&table->pi_hash, pi);
635+
569636
if (pi->peer)
570637
pi->peer->stat_pfx_loc_rib--;
571638
hook_call(bgp_snmp_update_stats, dest, pi, false);
@@ -5265,6 +5332,7 @@ void bgp_update(struct peer *peer, const struct prefix *p, uint32_t addpath_id,
52655332
{
52665333
int ret;
52675334
struct bgp_dest *dest;
5335+
struct bgp_table *rib_table;
52685336
struct bgp *bgp;
52695337
struct attr new_attr = {};
52705338
struct attr *attr_new;
@@ -5298,6 +5366,7 @@ void bgp_update(struct peer *peer, const struct prefix *p, uint32_t addpath_id,
52985366

52995367
bgp = peer->bgp;
53005368
dest = bgp_afi_node_get(bgp->rib[afi][safi], afi, safi, p, prd);
5369+
rib_table = bgp_dest_table(dest);
53015370

53025371
if (num_labels &&
53035372
((afi == AFI_L2VPN && safi == SAFI_EVPN) || bgp_is_valid_label(&label[0]))) {
@@ -5324,12 +5393,14 @@ void bgp_update(struct peer *peer, const struct prefix *p, uint32_t addpath_id,
53245393
bgp_adj_in_set(dest, peer, attr, addpath_id, &bgp_labels);
53255394
}
53265395

5327-
/* Check previously received route. */
5328-
for (pi = bgp_dest_get_bgp_path_info(dest); pi; pi = pi->next)
5329-
if (pi->peer == peer && pi->type == type
5330-
&& pi->sub_type == sub_type
5331-
&& pi->addpath_rx_id == addpath_id)
5332-
break;
5396+
/* Check previously received route using pi_hash */
5397+
struct bgp_path_info pi_lookup = { .net = dest,
5398+
.peer = peer,
5399+
.type = type,
5400+
.sub_type = sub_type,
5401+
.addpath_rx_id = addpath_id };
5402+
5403+
pi = bgp_pi_hash_find(&rib_table->pi_hash, &pi_lookup);
53335404

53345405
/* AS path local-as loop check. */
53355406
if (peer->change_local_as) {

bgpd/bgp_route.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#include "hook.h"
1212
#include "queue.h"
1313
#include "nexthop.h"
14+
#include "typesafe.h"
1415
#include "bgp_table.h"
1516
#include "bgp_addpath_types.h"
1617
#include "bgp_rpki.h"
@@ -275,6 +276,9 @@ struct bgp_path_info {
275276
struct bgp_path_info *next;
276277
struct bgp_path_info *prev;
277278

279+
/* Hash linkage for pi_hash in bgp_table */
280+
struct bgp_pi_hash_item pi_hash_link;
281+
278282
/* For nexthop linked list */
279283
LIST_ENTRY(bgp_path_info) nh_thread;
280284

@@ -744,6 +748,11 @@ DECLARE_HOOK(bgp_route_update,
744748
struct bgp_path_info *old_route, struct bgp_path_info *new_route),
745749
(bgp, afi, safi, bn, old_route, new_route));
746750

751+
extern int bgp_pi_hash_cmp(const struct bgp_path_info *p1, const struct bgp_path_info *p2);
752+
extern uint32_t bgp_pi_hash_hashfn(const struct bgp_path_info *pi);
753+
754+
DECLARE_HASH(bgp_pi_hash, struct bgp_path_info, pi_hash_link, bgp_pi_hash_cmp, bgp_pi_hash_hashfn);
755+
747756
/* BGP show options */
748757
#define BGP_SHOW_OPT_JSON (1 << 0)
749758
#define BGP_SHOW_OPT_WIDE (1 << 1)

bgpd/bgp_table.c

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,9 @@ void bgp_table_unlock(struct bgp_table *rt)
3232
return;
3333
}
3434

35+
/* Cleanup pi_hash in bgp_table */
36+
bgp_pi_hash_fini(&rt->pi_hash);
37+
3538
route_table_finish(rt->route_table);
3639
rt->route_table = NULL;
3740

@@ -83,6 +86,18 @@ inline struct bgp_dest *bgp_dest_unlock_node(struct bgp_dest *dest)
8386

8487
if (rn->lock == 1) {
8588
struct bgp_table *rt = bgp_dest_table(dest);
89+
struct bgp_path_info *pi;
90+
91+
/*
92+
* Paths from pi_hash are removed via bgp_path_info_reap(),
93+
* but any remaining paths should be removed here.
94+
*/
95+
if (rt && bgp_dest_get_bgp_path_info(dest)) {
96+
for (pi = bgp_dest_get_bgp_path_info(dest); pi; pi = pi->next) {
97+
bgp_pi_hash_del(&rt->pi_hash, pi);
98+
}
99+
}
100+
86101
if (rt->bgp) {
87102
bgp_addpath_free_node_data(&rt->bgp->tx_addpath,
88103
&dest->tx_addpath, rt->afi,
@@ -108,6 +123,18 @@ static void bgp_node_destroy(route_table_delegate_t *delegate,
108123
dest = bgp_dest_from_rnode(node);
109124
rt = table->info;
110125
if (dest) {
126+
struct bgp_path_info *pi;
127+
128+
/*
129+
* Paths from pi_hash are removed via bgp_path_info_reap(),
130+
* but any remaining paths should be removed here.
131+
*/
132+
if (rt && bgp_dest_get_bgp_path_info(dest)) {
133+
for (pi = bgp_dest_get_bgp_path_info(dest); pi; pi = pi->next) {
134+
bgp_pi_hash_del(&rt->pi_hash, pi);
135+
}
136+
}
137+
111138
if (rt->bgp) {
112139
bgp_addpath_free_node_data(&rt->bgp->tx_addpath,
113140
&dest->tx_addpath,
@@ -154,6 +181,7 @@ struct bgp_table *bgp_table_init(struct bgp *bgp, afi_t afi, safi_t safi)
154181
bgp_table_lock(rt);
155182
rt->afi = afi;
156183
rt->safi = safi;
184+
bgp_pi_hash_init(&rt->pi_hash);
157185

158186
return rt;
159187
}

bgpd/bgp_table.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,14 @@
1010
#include "table.h"
1111
#include "queue.h"
1212
#include "linklist.h"
13+
#include "typesafe.h"
1314
#include "bgpd.h"
1415
#include "bgp_advertise.h"
1516
#include "bgp_attr_srv6.h"
1617

18+
/* Typesafe hash for bgp_path_info lookup */
19+
PREDECL_HASH(bgp_pi_hash);
20+
1721
struct bgp_table {
1822
/* table belongs to this instance */
1923
struct bgp *bgp;
@@ -24,6 +28,9 @@ struct bgp_table {
2428

2529
int lock;
2630

31+
/* Hash for bgp_path_info lookups across all prefixes in this table */
32+
struct bgp_pi_hash_head pi_hash;
33+
2734
/* soft_reconfig_table in progress */
2835
bool soft_reconfig_init;
2936
struct event *soft_reconfig_thread;

bgpd/rfapi/rfapi.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1008,7 +1008,7 @@ void add_vnc_route(struct rfapi_descriptor *rfd, /* cookie, VPN UN addr, peer */
10081008
}
10091009
}
10101010

1011-
new = info_make(type, sub_type, 0, rfd->peer, new_attr, NULL);
1011+
new = info_make(type, sub_type, 0, rfd->peer, new_attr, bn);
10121012
SET_FLAG(new->flags, BGP_PATH_VALID);
10131013

10141014
/* save backref to rfapi handle */

0 commit comments

Comments
 (0)