Skip to content

Commit 5552def

Browse files
authored
Fix and improve a flaky test (#3275)
## Summary Fixed inconsistent test duration and occational `panic timeout` in Test_PeerConnection_RTX_E2E ``` panic: timeout goroutine 4309 [running]: github.com/pion/webrtc/v4.Test_PeerConnection_RTX_E2E.TimeOut.func5() /Users/ytakeda/go/pkg/mod/github.com/pion/transport/[email protected]/test/util.go:27 +0xb4 created by time.goFunc /Users/ytakeda/sdk/go1.25.4/src/time/sleep.go:215 +0x44 ``` ### Problem The test was timing out intermittently (0.2s-18.6s variance) when run with the race detector due to: * Random 20% packet loss causing unpredictable RTX detection timing * Missing goroutine cleanup causing hangs * Race detector overhead amplifying timing issues ``` go test -v -race -run Test_PeerConnection_RTX_E2E -count 20 === RUN Test_PeerConnection_RTX_E2E --- PASS: Test_PeerConnection_RTX_E2E (7.46s) === RUN Test_PeerConnection_RTX_E2E --- PASS: Test_PeerConnection_RTX_E2E (1.42s) === RUN Test_PeerConnection_RTX_E2E --- PASS: Test_PeerConnection_RTX_E2E (0.72s) === RUN Test_PeerConnection_RTX_E2E --- PASS: Test_PeerConnection_RTX_E2E (18.62s) === RUN Test_PeerConnection_RTX_E2E --- PASS: Test_PeerConnection_RTX_E2E (1.22s) : ``` ### Solution 1. Deterministic packet dropping - Changed from random 20% loss to dropping every 5th packet, ensuring predictable NACK/RTX triggering 2. Dynamic payload type detection - Uses negotiated payload type instead of hardcoded 96, making the test robust to codec configuration changes 3. Proper goroutine cleanup - Added context-based cleanup for RTCP reader and OnTrack callback 4. Fail-fast validation - Checks assertion return values and exits immediately on failure 5. Correct cleanup order - Closes peer connections before stopping the virtual network ### Result * Consistent ~0.21s execution time (tested 50 runs with race detector) * No more timeouts * Still validates NACK/RTX behavior as intended by PR #2919 ``` go test -v -race -run Test_PeerConnection_RTX_E2E -count 10 === RUN Test_PeerConnection_RTX_E2E --- PASS: Test_PeerConnection_RTX_E2E (0.21s) === RUN Test_PeerConnection_RTX_E2E --- PASS: Test_PeerConnection_RTX_E2E (0.21s) === RUN Test_PeerConnection_RTX_E2E --- PASS: Test_PeerConnection_RTX_E2E (0.21s) === RUN Test_PeerConnection_RTX_E2E --- PASS: Test_PeerConnection_RTX_E2E (0.21s) === RUN Test_PeerConnection_RTX_E2E --- PASS: Test_PeerConnection_RTX_E2E (0.21s) === RUN Test_PeerConnection_RTX_E2E --- PASS: Test_PeerConnection_RTX_E2E (0.21s) === RUN Test_PeerConnection_RTX_E2E --- PASS: Test_PeerConnection_RTX_E2E (0.21s) === RUN Test_PeerConnection_RTX_E2E --- PASS: Test_PeerConnection_RTX_E2E (0.21s) === RUN Test_PeerConnection_RTX_E2E --- PASS: Test_PeerConnection_RTX_E2E (0.21s) === RUN Test_PeerConnection_RTX_E2E --- PASS: Test_PeerConnection_RTX_E2E (0.21s) PASS ok github.com/pion/webrtc/v4 3.373s ```
1 parent 418e18c commit 5552def

File tree

1 file changed

+93
-18
lines changed

1 file changed

+93
-18
lines changed

peerconnection_media_test.go

Lines changed: 93 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@ import (
1313
"errors"
1414
"fmt"
1515
"io"
16-
"math/rand"
1716
"regexp"
1817
"strings"
1918
"sync"
@@ -2353,21 +2352,54 @@ func Test_PeerConnection_RTX_E2E(t *testing.T) { //nolint:cyclop
23532352

23542353
pcOffer, pcAnswer, wan := createVNetPair(t, nil)
23552354

2356-
wan.AddChunkFilter(func(vnet.Chunk) bool {
2357-
return rand.Intn(5) != 4 //nolint: gosec
2358-
})
2359-
23602355
track, err := NewTrackLocalStaticSample(RTPCodecCapability{MimeType: MimeTypeVP8}, "track-id", "stream-id")
23612356
assert.NoError(t, err)
23622357

23632358
rtpSender, err := pcOffer.AddTrack(track)
23642359
assert.NoError(t, err)
23652360

2361+
// Signal pair first to negotiate codecs
2362+
assert.NoError(t, signalPair(pcOffer, pcAnswer))
2363+
2364+
// Get the negotiated payload type for the media codec
2365+
mediaPayloadType := uint8(rtpSender.GetParameters().Codecs[0].PayloadType)
2366+
2367+
// Use deterministic packet dropping: drop every 5th packet (20% loss)
2368+
// This is more realistic and provides faster, more consistent test results
2369+
var packetCount atomic.Uint32
2370+
wan.AddChunkFilter(func(c vnet.Chunk) bool {
2371+
// Only filter RTP packets (not RTCP, STUN, etc)
2372+
h := &rtp.Header{}
2373+
if _, err := h.Unmarshal(c.UserData()); err != nil {
2374+
return true // Not an RTP packet, let it through
2375+
}
2376+
2377+
// Drop every 5th media packet to trigger NACK/RTX
2378+
if h.PayloadType == mediaPayloadType {
2379+
count := packetCount.Add(1)
2380+
if count%5 == 0 {
2381+
return false // Drop this packet
2382+
}
2383+
}
2384+
2385+
return true
2386+
})
2387+
2388+
// Create context for coordinated cleanup
2389+
testCtx, testCancel := context.WithCancel(context.Background())
2390+
defer testCancel()
2391+
2392+
// RTCP reader with proper cleanup
23662393
go func() {
23672394
rtcpBuf := make([]byte, 1500)
23682395
for {
2369-
if _, _, rtcpErr := rtpSender.Read(rtcpBuf); rtcpErr != nil {
2396+
select {
2397+
case <-testCtx.Done():
23702398
return
2399+
default:
2400+
if _, _, rtcpErr := rtpSender.Read(rtcpBuf); rtcpErr != nil {
2401+
return
2402+
}
23712403
}
23722404
}
23732405
}()
@@ -2376,45 +2408,88 @@ func Test_PeerConnection_RTX_E2E(t *testing.T) { //nolint:cyclop
23762408
ssrc := rtpSender.GetParameters().Encodings[0].SSRC
23772409

23782410
rtxRead, rtxReadCancel := context.WithCancel(context.Background())
2411+
defer rtxReadCancel() // Ensure cleanup even if RTX is never detected
2412+
2413+
// Track whether we've seen RTX
2414+
var rtxDetected atomic.Bool
2415+
2416+
// OnTrack with proper cleanup
23792417
pcAnswer.OnTrack(func(track *TrackRemote, _ *RTPReceiver) {
23802418
for {
2419+
select {
2420+
case <-testCtx.Done():
2421+
return
2422+
default:
2423+
}
2424+
23812425
pkt, attributes, readRTPErr := track.ReadRTP()
2382-
if errors.Is(readRTPErr, io.EOF) {
2426+
if readRTPErr != nil {
23832427
return
2384-
} else if pkt.PayloadType == 0 {
2385-
continue
23862428
}
23872429

2388-
assert.NotNil(t, pkt)
2389-
assert.Equal(t, pkt.SSRC, uint32(ssrc))
2390-
assert.Equal(t, pkt.PayloadType, uint8(96))
2430+
// Validate packet - fail fast if unexpected
2431+
if !assert.NotNil(t, pkt) {
2432+
return
2433+
}
2434+
if !assert.Equal(t, uint32(ssrc), pkt.SSRC, "Unexpected SSRC") {
2435+
return
2436+
}
2437+
if !assert.Equal(t, mediaPayloadType, pkt.PayloadType, "Unexpected payload type") {
2438+
return
2439+
}
23912440

2441+
// Check if this is an RTX retransmission
23922442
rtxPayloadType := attributes.Get(AttributeRtxPayloadType)
23932443
rtxSequenceNumber := attributes.Get(AttributeRtxSequenceNumber)
23942444
rtxSSRC := attributes.Get(AttributeRtxSsrc)
23952445
if rtxPayloadType != nil && rtxSequenceNumber != nil && rtxSSRC != nil {
2396-
assert.Equal(t, rtxPayloadType, uint8(97))
2397-
assert.Equal(t, rtxSSRC, uint32(rtxSsrc))
2446+
// Validate RTX attributes
2447+
if !assert.Equal(t, uint8(97), rtxPayloadType, "Unexpected RTX payload type") {
2448+
return
2449+
}
2450+
if !assert.Equal(t, uint32(rtxSsrc), rtxSSRC, "Unexpected RTX SSRC") {
2451+
return
2452+
}
2453+
2454+
// RTX detected successfully
2455+
if rtxDetected.CompareAndSwap(false, true) {
2456+
rtxReadCancel()
23982457

2399-
rtxReadCancel()
2458+
return
2459+
}
24002460
}
24012461
}
24022462
})
24032463

2404-
assert.NoError(t, signalPair(pcOffer, pcAnswer))
2464+
// Send packets until RTX is detected or timeout
2465+
// With 20% loss, we should see RTX within a few seconds
2466+
rtxTimeout := time.NewTimer(10 * time.Second)
2467+
defer rtxTimeout.Stop()
24052468

24062469
func() {
2470+
ticker := time.NewTicker(20 * time.Millisecond)
2471+
defer ticker.Stop()
2472+
24072473
for {
24082474
select {
2409-
case <-time.After(20 * time.Millisecond):
2475+
case <-ticker.C:
24102476
writeErr := track.WriteSample(media.Sample{Data: []byte{0x00}, Duration: time.Second})
24112477
assert.NoError(t, writeErr)
24122478
case <-rtxRead.Done():
2479+
2480+
return
2481+
case <-rtxTimeout.C:
2482+
assert.Fail(t, "RTX packet not detected within timeout - NACK/RTX mechanism may not be working")
2483+
24132484
return
24142485
}
24152486
}
24162487
}()
24172488

2418-
assert.NoError(t, wan.Stop())
2489+
// Verify RTX was actually detected
2490+
assert.True(t, rtxDetected.Load(), "RTX packet should have been detected")
2491+
2492+
// Close peer connections before stopping the network
24192493
closePairNow(t, pcOffer, pcAnswer)
2494+
assert.NoError(t, wan.Stop())
24202495
}

0 commit comments

Comments
 (0)