Skip to content

Commit 541eb49

Browse files
committed
Fixes in this commit:
1. RFC violation on session reset. 2. RFC violation on checking of cp->len. 3. RFC violation on the timing of updating remote discriminator. For 1, the previous logic in `bfd_recv_cb` incorrectly discarded all packets with `Your Discriminator` set to 0 if the local session state was `UP` or `INIT`. This violated RFC 5880 Section 6.8.6, which requires accepting such packets (specifically when State is DOWN) to handle remote peer restarts. This discrepancy may cause "zombie sessions" where the local side remained UP ignoring the peer's reset signal. The original check was introduced with the intention to prevent session aliasing, which caused session state flapping. However, the check was too broad. This patch replaces the state-based check with a more strict address-based check: - Removed the conditions of `bfd->ses_state !=` which would block legitimate resets. - Added `match_ip_address` to verify that if a zero-discriminator packet arrives for an UP session. its destination address matches the session's bound local address. - Added the check required by the RFC 5880 that if a zero remote discriminator packet arrives with a state that is not DOWN or ADMIN_DOWN, the packet should be dropped. This allows legitimate peer resets (correct destination) to proceed while dropping aliased packets that causes vibration. For 2, this is a relatively minor one and potentially nitpicking. But RFC 5880 requires that the parser should check `cp->len < 26` if A bit is set, so I include it. For 3, the patch delays the update of remote discriminator to after `bfd_check_auth` returns success. This is required by the RFC. Updating remote discriminator before auth check would incorrectly update local state even when authentication fails, which can be harmful. This patch also modifies `bfd.h` to add a new pair of marco that get and set A bit. It also changes the implementation of `BFD_SETCBIT` to align with other marco's way of implementation. Signed-off-by: spademomo <[email protected]>
1 parent 0a7b0ef commit 541eb49

File tree

523 files changed

+33332
-11883
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

523 files changed

+33332
-11883
lines changed
File renamed without changes.

.github/mergify.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
pull_request_rules:
2+
- name: Delete merged branch
3+
conditions:
4+
- merged
5+
actions:
6+
delete_head_branch:

.github/workflows/github-ci.yml

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ jobs:
2424
- { os: 'ubuntu-latest' ,rel: '24.04', name: 'Ubuntu 24.04 amd64 Build',platform : 'amd64_u24' }
2525
- { os: 'ubuntu-22.04-arm' ,rel: '22.04', name: 'Ubuntu 22.04 arm64 Build',platform : 'arm64_u22' }
2626
- { os: 'ubuntu-22.04-arm' ,rel: '24.04', name: 'Ubuntu 24.04 arm64 Build',platform : 'arm64_u24' }
27+
- { os: 'ubuntu-latest' ,rel: '24.04', name: 'Ubuntu 24.04 amd64 LTTng Build',platform : 'amd64_u24_lttng', lttng: 'true' }
2728

2829
steps:
2930
- name: Checkout
@@ -32,7 +33,7 @@ jobs:
3233
fetch-depth: 1
3334
- name: Build docker image
3435
run: |
35-
docker build --no-cache -t frr-${{ matrix.cfg.platform }} --build-arg=UBUNTU_VERSION=${{ matrix.cfg.rel }} -f docker/ubuntu-ci/Dockerfile .
36+
docker build --no-cache -t frr-${{ matrix.cfg.platform }} --build-arg=UBUNTU_VERSION=${{ matrix.cfg.rel }} --build-arg=ENABLE_LTTNG=${{ matrix.cfg.lttng || 'false' }} -f docker/ubuntu-ci/Dockerfile .
3637
docker save --output /tmp/frr-${{ matrix.cfg.platform }}.tar frr-${{ matrix.cfg.platform }}
3738
- name: Upload docker image artifact
3839
uses: actions/upload-artifact@v4
@@ -86,7 +87,9 @@ jobs:
8687
run: |
8788
uname -a
8889
MODPKGVER=$(uname -r)
89-
sudo apt-get update -y
90+
# Retry loop for apt-get update in case of transient mirror failures
91+
APT_RETRIES=3
92+
for i in $(seq 1 ${APT_RETRIES}); do sudo apt-get update -y && break || sleep 10; done
9093
# Github is running old kernels but installing newer packages :(
9194
sudo apt-get install -y linux-modules-extra-azure linux-modules-${MODPKGVER} linux-modules-extra-${MODPKGVER} python3-xmltodict
9295
sudo modprobe vrf || true

alpine/APKBUILD.in

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ makedepends="ncurses-dev net-snmp-dev gawk texinfo perl
1919
perl pkgconf python3 python3-dev readline readline-dev sqlite-libs pcre2-dev
2020
squashfs-tools sudo tar texinfo xorriso xz-libs py-pip rtrlib rtrlib-dev
2121
py3-sphinx elfutils elfutils-dev protobuf-c-compiler protobuf-c-dev
22-
lua5.3-dev lua5.3 gzip"
22+
lua5.3-dev lua5.3 gzip pytest"
2323
checkdepends="pytest py-setuptools"
2424
install="$pkgname.pre-install $pkgname.pre-deinstall $pkgname.post-deinstall"
2525
subpackages="$pkgname-dev $pkgname-doc $pkgname-dbg"

bfdd/bfd.c

Lines changed: 59 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
#include "lib/network.h"
1919

2020
#include "bfd.h"
21+
#include "bfd_trace.h"
2122

2223
DEFINE_MTYPE_STATIC(BFDD, BFDD_CONFIG, "long-lived configuration memory");
2324
DEFINE_MTYPE_STATIC(BFDD, BFDD_PROFILE, "long-lived profile memory");
@@ -153,6 +154,9 @@ void bfd_profile_apply(const char *profname, struct bfd_session *bs)
153154
/* Point to profile if it exists. */
154155
bs->profile = bp;
155156

157+
if (bp)
158+
frrtrace(2, frr_bfd, profile_apply, bs, profname);
159+
156160
/* Apply configuration. */
157161
bfd_session_apply(bs);
158162
}
@@ -232,10 +236,16 @@ void bfd_session_apply(struct bfd_session *bs)
232236
bfd_set_log_session_changes(bs, bs->peer_profile.log_session_changes);
233237

234238
/* If session interval changed negotiate new timers. */
235-
if (bs->ses_state == PTM_BFD_UP
236-
&& (bs->timers.desired_min_tx != min_tx
237-
|| bs->timers.required_min_rx != min_rx))
239+
if (bs->ses_state == PTM_BFD_UP &&
240+
(bs->timers.desired_min_tx != min_tx || bs->timers.required_min_rx != min_rx)) {
241+
if (bglobal.debug_peer_event) {
242+
zlog_debug("[%s] Timer changed while UP: old_tx=%u new_tx=%u, old_rx=%u new_rx=%u",
243+
bs_to_string(bs), min_tx, bs->timers.desired_min_tx, min_rx,
244+
bs->timers.required_min_rx);
245+
zlog_debug("Starting poll sequence to negotiate new timers");
246+
}
238247
bfd_set_polling(bs);
248+
}
239249

240250
/* Send updated information to data plane. */
241251
bfd_dplane_update_session(bs);
@@ -327,6 +337,7 @@ int bfd_session_enable(struct bfd_session *bs)
327337
if (bs->key.vrfname[0]) {
328338
vrf = vrf_lookup_by_name(bs->key.vrfname);
329339
if (vrf == NULL) {
340+
frrtrace(1, frr_bfd, vrf_not_found, bs->key.vrfname);
330341
zlog_err(
331342
"session-enable: specified VRF %s doesn't exists.",
332343
bs->key.vrfname);
@@ -341,6 +352,7 @@ int bfd_session_enable(struct bfd_session *bs)
341352
if (bs->key.ifname[0]) {
342353
ifp = if_lookup_by_name(bs->key.ifname, vrf->vrf_id);
343354
if (ifp == NULL) {
355+
frrtrace(2, frr_bfd, interface_not_found, bs->key.ifname, vrf->vrf_id);
344356
zlog_err(
345357
"session-enable: specified interface %s (VRF %s) doesn't exist.",
346358
bs->key.ifname, vrf->name);
@@ -415,6 +427,8 @@ int bfd_session_enable(struct bfd_session *bs)
415427
/* initialize RTT */
416428
bfd_rtt_init(bs);
417429

430+
frrtrace(2, frr_bfd, session_enable_event, true, bs);
431+
418432
return 0;
419433
}
420434

@@ -430,6 +444,8 @@ void bfd_session_disable(struct bfd_session *bs)
430444
if (bs->bdc)
431445
return;
432446

447+
frrtrace(2, frr_bfd, session_enable_event, false, bs);
448+
433449
/* Free up socket resources. */
434450
if (bs->sock != -1) {
435451
close(bs->sock);
@@ -562,6 +578,8 @@ void ptm_bfd_echo_stop(struct bfd_session *bfd)
562578

563579
bfd_echo_xmttimer_delete(bfd);
564580
bfd_echo_recvtimer_delete(bfd);
581+
582+
frrtrace(2, frr_bfd, echo_mode_change, bfd, false);
565583
}
566584

567585
void ptm_bfd_echo_start(struct bfd_session *bfd)
@@ -570,6 +588,7 @@ void ptm_bfd_echo_start(struct bfd_session *bfd)
570588
if (bfd->echo_detect_TO > 0) {
571589
bfd_echo_recvtimer_update(bfd);
572590
ptm_bfd_echo_xmt_TO(bfd);
591+
frrtrace(2, frr_bfd, echo_mode_change, bfd, true);
573592
}
574593
}
575594

@@ -589,6 +608,8 @@ void ptm_bfd_sess_up(struct bfd_session *bfd)
589608

590609
ptm_bfd_notify(bfd, bfd->ses_state);
591610

611+
frrtrace(4, frr_bfd, state_change, bfd, old_state, bfd->ses_state, bfd->local_diag);
612+
592613
if (old_state != bfd->ses_state) {
593614
bfd->stats.session_up++;
594615
if (bglobal.debug_peer_event)
@@ -638,6 +659,8 @@ void ptm_bfd_sess_dn(struct bfd_session *bfd, uint8_t diag)
638659
bfd_xmttimer_delete(bfd);
639660
}
640661

662+
frrtrace(4, frr_bfd, state_change, bfd, old_state, bfd->ses_state, bfd->local_diag);
663+
641664
if (old_state != bfd->ses_state) {
642665
bfd->stats.session_down++;
643666
if (bglobal.debug_peer_event)
@@ -1005,6 +1028,8 @@ void bfd_session_free(struct bfd_session *bs)
10051028
{
10061029
struct bfd_session_observer *bso;
10071030

1031+
frrtrace(2, frr_bfd, session_lifecycle, false, bs);
1032+
10081033
bfd_session_disable(bs);
10091034

10101035
/* Remove session from data plane if any. */
@@ -1116,6 +1141,8 @@ struct bfd_session *bs_registrate(struct bfd_session *bfd)
11161141
if (bfd->key.ifname[0] || bfd->key.vrfname[0] || bfd->sock == -1)
11171142
bs_observer_add(bfd);
11181143

1144+
frrtrace(2, frr_bfd, session_lifecycle, true, bfd);
1145+
11191146
if (bglobal.debug_peer_event)
11201147
zlog_debug("session-new: %s", bs_to_string(bfd));
11211148

@@ -1133,6 +1160,7 @@ int ptm_bfd_sess_del(struct bfd_peer_cfg *bpc)
11331160

11341161
/* This pointer is being referenced, don't let it be deleted. */
11351162
if (bs->refcount > 0) {
1163+
frrtrace(1, frr_bfd, refcount_error, bs->refcount);
11361164
zlog_err("session-delete: refcount failure: %" PRIu64" references",
11371165
bs->refcount);
11381166
return -1;
@@ -1157,6 +1185,9 @@ void bfd_set_polling(struct bfd_session *bs)
11571185
*
11581186
* RFC 5880, Section 6.8.3.
11591187
*/
1188+
if (bglobal.debug_peer_event)
1189+
zlog_debug("[%s] Setting polling=1 to negotiate timer change", bs_to_string(bs));
1190+
11601191
bs->polling = 1;
11611192
}
11621193

@@ -1482,13 +1513,27 @@ void bs_final_handler(struct bfd_session *bs)
14821513
bs->xmt_TO = bs->timers.desired_min_tx;
14831514
else
14841515
bs->xmt_TO = bs->remote_timers.required_min_rx;
1485-
1516+
1517+
if (bglobal.debug_peer_event)
1518+
zlog_debug(" calculated xmt_TO=%" PRIu64 " (using slowest rate)", bs->xmt_TO);
1519+
1520+
frrtrace(6, frr_bfd, timer_negotiation, bs, bs->xmt_TO, bs->cur_timers.required_min_rx,
1521+
bs->detect_TO, bs->echo_xmt_TO, bs->echo_detect_TO);
1522+
14861523
/* Only apply increased transmission interval after Poll Sequence */
1487-
if (bs->ses_state == PTM_BFD_UP && bs->xmt_TO > old_xmt_TO) {
1488-
bs->xmt_TO = old_xmt_TO; /* Keep old timing until Poll Sequence done */
1524+
if (bs->ses_state == PTM_BFD_UP && bs->xmt_TO > old_xmt_TO && bs->polling) {
1525+
if (bglobal.debug_peer_event) {
1526+
zlog_debug(" REJECTED increase: xmt_TO=%" PRIu64 " > old=%" PRIu64
1527+
" (polling=%d)",
1528+
bs->xmt_TO, old_xmt_TO, bs->polling);
1529+
}
1530+
bs->xmt_TO = old_xmt_TO; /* Keep old timing DURING Poll Sequence */
14891531
return;
14901532
}
14911533

1534+
if (bglobal.debug_peer_event)
1535+
zlog_debug(" APPLYING new xmt_TO=%" PRIu64, bs->xmt_TO);
1536+
14921537
/* Apply new transmission timer immediately. */
14931538
ptm_bfd_start_xmt_timer(bs, false);
14941539
}
@@ -2442,6 +2487,8 @@ static int bfd_vrf_new(struct vrf *vrf)
24422487
if (bglobal.debug_zebra)
24432488
zlog_debug("VRF Created: %s(%u)", vrf->name, vrf->vrf_id);
24442489

2490+
frrtrace(2, frr_bfd, vrf_lifecycle, 1, vrf->vrf_id);
2491+
24452492
bvrf = XCALLOC(MTYPE_BFDD_VRF, sizeof(struct bfd_vrf_global));
24462493
bvrf->vrf = vrf;
24472494
vrf->info = bvrf;
@@ -2463,6 +2510,8 @@ static int bfd_vrf_delete(struct vrf *vrf)
24632510
if (bglobal.debug_zebra)
24642511
zlog_debug("VRF Deletion: %s(%u)", vrf->name, vrf->vrf_id);
24652512

2513+
frrtrace(2, frr_bfd, vrf_lifecycle, 2, vrf->vrf_id);
2514+
24662515
XFREE(MTYPE_BFDD_VRF, vrf->info);
24672516

24682517
return 0;
@@ -2475,6 +2524,8 @@ static int bfd_vrf_enable(struct vrf *vrf)
24752524
if (bglobal.debug_zebra)
24762525
zlog_debug("VRF enable add %s id %u", vrf->name, vrf->vrf_id);
24772526

2527+
frrtrace(2, frr_bfd, vrf_lifecycle, 3, vrf->vrf_id);
2528+
24782529
/* Don't open sockets when using data plane */
24792530
if (bglobal.bg_use_dplane)
24802531
goto skip_sockets;
@@ -2536,6 +2587,8 @@ static int bfd_vrf_disable(struct vrf *vrf)
25362587
if (bglobal.debug_zebra)
25372588
zlog_debug("VRF disable %s id %d", vrf->name, vrf->vrf_id);
25382589

2590+
frrtrace(2, frr_bfd, vrf_lifecycle, 4, vrf->vrf_id);
2591+
25392592
/* Disable read/write poll triggering. */
25402593
event_cancel(&bvrf->bg_ev[0]);
25412594
event_cancel(&bvrf->bg_ev[1]);

bfdd/bfd.h

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,7 @@ struct bfd_echo_pkt {
168168
#define BFD_DEMANDBIT 0x02
169169
#define BFD_MBIT 0x01
170170
#define BFD_GETMBIT(flags) (CHECK_FLAG(flags, BFD_MBIT))
171+
#define BFD_GETDEMANDBIT(flags) (CHECK_FLAG(flags, BFD_DEMANDBIT))
171172
#define BFD_SETDEMANDBIT(flags, val) \
172173
{ \
173174
if ((val)) \
@@ -191,12 +192,18 @@ struct bfd_echo_pkt {
191192
SET_FLAG(flags, (CHECK_FLAG(val, 0x3) << 6)); \
192193
}
193194
#define BFD_GETSTATE(flags) (CHECK_FLAG((flags >> 6), 0x3))
194-
#define BFD_SETCBIT(flags, val) \
195-
{ \
196-
if ((val)) \
197-
SET_FLAG(flags, val); \
195+
#define BFD_SETCBIT(flags, val) \
196+
{ \
197+
if ((val)) \
198+
SET_FLAG(flags, BFD_CBIT); \
198199
}
199200
#define BFD_GETCBIT(flags) (CHECK_FLAG(flags, BFD_CBIT))
201+
#define BFD_SETABIT(flags, val) \
202+
{ \
203+
if ((val)) \
204+
SET_FLAG(flags, BFD_ABIT); \
205+
}
206+
#define BFD_GETABIT(flags) (CHECK_FLAG(flags, BFD_ABIT))
200207
#define BFD_ECHO_VERSION 1
201208
#define BFD_ECHO_PKT_LEN sizeof(struct bfd_echo_pkt)
202209

@@ -357,7 +364,9 @@ struct bfd_session {
357364
struct event *echo_recvtimer_ev;
358365
struct event *recvtimer_ev;
359366
uint64_t xmt_TO;
367+
uint64_t xmt_TO_actual; /* Actual transmit timeout with jitter applied */
360368
uint64_t echo_xmt_TO;
369+
uint64_t echo_xmt_TO_actual; /* Actual echo transmit timeout with jitter applied */
361370
struct event *xmttimer_ev;
362371
struct event *echo_xmttimer_ev;
363372
uint64_t echo_detect_TO;

0 commit comments

Comments
 (0)