Skip to content

Commit 9be7169

Browse files
authored
Merge pull request #628 from libp2p/test/fix-hung-test
fix all flaky tests
2 parents a7fb794 + 297911c commit 9be7169

File tree

5 files changed

+123
-77
lines changed

5 files changed

+123
-77
lines changed

dht_test.go

Lines changed: 52 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,6 @@ import (
3232
kb "github.com/libp2p/go-libp2p-kbucket"
3333
record "github.com/libp2p/go-libp2p-record"
3434
swarmt "github.com/libp2p/go-libp2p-swarm/testing"
35-
"github.com/libp2p/go-libp2p-testing/ci"
3635
bhost "github.com/libp2p/go-libp2p/p2p/host/basic"
3736
ma "github.com/multiformats/go-multiaddr"
3837

@@ -129,7 +128,7 @@ func setupDHT(ctx context.Context, t *testing.T, client bool, options ...Option)
129128
return d
130129
}
131130

132-
func setupDHTS(t *testing.T, ctx context.Context, n int) []*IpfsDHT {
131+
func setupDHTS(t *testing.T, ctx context.Context, n int, options ...Option) []*IpfsDHT {
133132
addrs := make([]ma.Multiaddr, n)
134133
dhts := make([]*IpfsDHT, n)
135134
peers := make([]peer.ID, n)
@@ -138,7 +137,7 @@ func setupDHTS(t *testing.T, ctx context.Context, n int) []*IpfsDHT {
138137
sanityPeersMap := make(map[string]struct{})
139138

140139
for i := 0; i < n; i++ {
141-
dhts[i] = setupDHT(ctx, t, false)
140+
dhts[i] = setupDHT(ctx, t, false, options...)
142141
peers[i] = dhts[i].PeerID()
143142
addrs[i] = dhts[i].host.Addrs()[0]
144143

@@ -673,8 +672,9 @@ func TestLocalProvides(t *testing.T) {
673672
}
674673

675674
// if minPeers or avgPeers is 0, dont test for it.
676-
func waitForWellFormedTables(t *testing.T, dhts []*IpfsDHT, minPeers, avgPeers int, timeout time.Duration) bool {
675+
func waitForWellFormedTables(t *testing.T, dhts []*IpfsDHT, minPeers, avgPeers int, timeout time.Duration) {
677676
// test "well-formed-ness" (>= minPeers peers in every routing table)
677+
t.Helper()
678678

679679
checkTables := func() bool {
680680
totalPeers := 0
@@ -699,11 +699,12 @@ func waitForWellFormedTables(t *testing.T, dhts []*IpfsDHT, minPeers, avgPeers i
699699
for {
700700
select {
701701
case <-timeoutA:
702-
logger.Debugf("did not reach well-formed routing tables by %s", timeout)
703-
return false // failed
702+
t.Errorf("failed to reach well-formed routing tables after %s", timeout)
703+
return
704704
case <-time.After(5 * time.Millisecond):
705705
if checkTables() {
706-
return true // succeeded
706+
// succeeded
707+
return
707708
}
708709
}
709710
}
@@ -760,6 +761,7 @@ func TestRefresh(t *testing.T) {
760761
}
761762

762763
waitForWellFormedTables(t, dhts, 7, 10, 10*time.Second)
764+
763765
cancelT()
764766

765767
if u.Debug {
@@ -830,12 +832,12 @@ func TestRefreshBelowMinRTThreshold(t *testing.T) {
830832
}
831833

832834
func TestPeriodicRefresh(t *testing.T) {
833-
if ci.IsRunning() {
834-
t.Skip("skipping on CI. highly timing dependent")
835-
}
836835
if testing.Short() {
837836
t.SkipNow()
838837
}
838+
if detectrace.WithRace() {
839+
t.Skip("skipping due to race detector max goroutines")
840+
}
839841

840842
ctx, cancel := context.WithCancel(context.Background())
841843
defer cancel()
@@ -894,7 +896,9 @@ func TestPeriodicRefresh(t *testing.T) {
894896
}
895897

896898
func TestProvidesMany(t *testing.T) {
897-
t.Skip("this test doesn't work")
899+
if detectrace.WithRace() {
900+
t.Skip("skipping due to race detector max goroutines")
901+
}
898902
ctx, cancel := context.WithCancel(context.Background())
899903
defer cancel()
900904

@@ -1149,9 +1153,6 @@ func TestConnectCollision(t *testing.T) {
11491153
if testing.Short() {
11501154
t.SkipNow()
11511155
}
1152-
if ci.IsRunning() {
1153-
t.Skip("Skipping on CI.")
1154-
}
11551156

11561157
runTimes := 10
11571158

@@ -1337,7 +1338,7 @@ func minInt(a, b int) int {
13371338
}
13381339

13391340
func TestFindPeerQueryMinimal(t *testing.T) {
1340-
testFindPeerQuery(t, 2, 22, 11)
1341+
testFindPeerQuery(t, 2, 22, 1)
13411342
}
13421343

13431344
func TestFindPeerQuery(t *testing.T) {
@@ -1348,62 +1349,70 @@ func TestFindPeerQuery(t *testing.T) {
13481349
if testing.Short() {
13491350
t.Skip("skipping test in short mode")
13501351
}
1351-
if curFileLimit() < 1024 {
1352-
t.Skip("insufficient file descriptors available")
1353-
}
1354-
testFindPeerQuery(t, 20, 80, 16)
1352+
testFindPeerQuery(t, 5, 40, 3)
13551353
}
13561354

13571355
// NOTE: You must have ATLEAST (minRTRefreshThreshold+1) test peers before using this.
13581356
func testFindPeerQuery(t *testing.T,
13591357
bootstrappers, // Number of nodes connected to the querying node
13601358
leafs, // Number of nodes that might be connected to from the bootstrappers
1361-
bootstrapperLeafConns int, // Number of connections each bootstrapper has to the leaf nodes
1359+
bootstrapConns int, // Number of bootstrappers each leaf should connect to.
13621360
) {
13631361
ctx, cancel := context.WithCancel(context.Background())
13641362
defer cancel()
13651363

1366-
dhts := setupDHTS(t, ctx, 1+bootstrappers+leafs)
1364+
dhts := setupDHTS(t, ctx, 1+bootstrappers+leafs, BucketSize(4))
13671365
defer func() {
13681366
for _, d := range dhts {
13691367
d.Close()
13701368
d.host.Close()
13711369
}
13721370
}()
13731371

1372+
t.Log("connecting")
1373+
13741374
mrand := rand.New(rand.NewSource(42))
13751375
guy := dhts[0]
13761376
others := dhts[1:]
1377-
for i := 0; i < bootstrappers; i++ {
1378-
for j := 0; j < bootstrapperLeafConns; j++ {
1379-
v := mrand.Intn(leafs)
1380-
connectNoSync(t, ctx, others[i], others[bootstrappers+v])
1377+
for i := 0; i < leafs; i++ {
1378+
for _, v := range mrand.Perm(bootstrappers)[:bootstrapConns] {
1379+
connectNoSync(t, ctx, others[v], others[bootstrappers+i])
13811380
}
13821381
}
13831382

13841383
for i := 0; i < bootstrappers; i++ {
13851384
connectNoSync(t, ctx, guy, others[i])
13861385
}
13871386

1387+
t.Log("waiting for routing tables")
1388+
13881389
// give some time for things to settle down
1389-
waitForWellFormedTables(t, dhts, minRTRefreshThreshold, minRTRefreshThreshold, 5*time.Second)
1390+
waitForWellFormedTables(t, dhts, bootstrapConns, bootstrapConns, 5*time.Second)
13901391

1391-
for _, d := range dhts {
1392-
if len(d.RoutingTable().ListPeers()) > 0 {
1393-
if err := <-d.RefreshRoutingTable(); err != nil {
1394-
t.Fatal(err)
1395-
}
1396-
}
1392+
t.Log("refreshing")
1393+
1394+
var wg sync.WaitGroup
1395+
for _, dht := range dhts {
1396+
wg.Add(1)
1397+
go func(d *IpfsDHT) {
1398+
<-d.RefreshRoutingTable()
1399+
wg.Done()
1400+
}(dht)
13971401
}
13981402

1399-
var reachableIds []peer.ID
1400-
for i, d := range dhts {
1401-
lp := len(d.host.Network().Peers())
1402-
if i != 0 && lp > 0 {
1403-
reachableIds = append(reachableIds, d.PeerID())
1404-
}
1403+
wg.Wait()
1404+
1405+
t.Log("waiting for routing tables again")
1406+
1407+
// Wait for refresh to work. At least one bucket should be full.
1408+
waitForWellFormedTables(t, dhts, 4, 0, 5*time.Second)
1409+
1410+
var peers []peer.ID
1411+
for _, d := range others {
1412+
peers = append(peers, d.PeerID())
14051413
}
1406-
t.Logf("%d reachable ids", len(reachableIds))
1414+
1415+
t.Log("querying")
14071416

14081417
val := "foobar"
14091418
rtval := kb.ConvertKey(val)
@@ -1418,7 +1427,7 @@ func testFindPeerQuery(t *testing.T,
14181427

14191428
sort.Sort(peer.IDSlice(outpeers))
14201429

1421-
exp := kb.SortClosestPeers(reachableIds, rtval)[:minInt(guy.bucketSize, len(reachableIds))]
1430+
exp := kb.SortClosestPeers(peers, rtval)[:minInt(guy.bucketSize, len(peers))]
14221431
t.Logf("got %d peers", len(outpeers))
14231432
got := kb.SortClosestPeers(outpeers, rtval)
14241433

@@ -1588,19 +1597,19 @@ func TestHandleRemotePeerProtocolChanges(t *testing.T) {
15881597
connect(t, ctx, dhtA, dhtB)
15891598

15901599
// now assert both have each other in their RT
1591-
require.True(t, waitForWellFormedTables(t, []*IpfsDHT{dhtA, dhtB}, 1, 1, 10*time.Second), "both RT should have one peer each")
1600+
waitForWellFormedTables(t, []*IpfsDHT{dhtA, dhtB}, 1, 1, 10*time.Second)
15921601

15931602
// dhtB becomes a client
15941603
require.NoError(t, dhtB.setMode(modeClient))
15951604

15961605
// which means that dhtA should evict it from it's RT
1597-
require.True(t, waitForWellFormedTables(t, []*IpfsDHT{dhtA}, 0, 0, 10*time.Second), "dHTA routing table should have 0 peers")
1606+
waitForWellFormedTables(t, []*IpfsDHT{dhtA}, 0, 0, 10*time.Second)
15981607

15991608
// dhtB becomes a server
16001609
require.NoError(t, dhtB.setMode(modeServer))
16011610

16021611
// which means dhtA should have it in the RT again because of fixLowPeers
1603-
require.True(t, waitForWellFormedTables(t, []*IpfsDHT{dhtA}, 1, 1, 10*time.Second), "dHTA routing table should have 1 peers")
1612+
waitForWellFormedTables(t, []*IpfsDHT{dhtA}, 1, 1, 10*time.Second)
16041613
}
16051614

16061615
func TestGetSetPluggedProtocol(t *testing.T) {

ext_test.go

Lines changed: 48 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -20,20 +20,21 @@ import (
2020

2121
ggio "github.com/gogo/protobuf/io"
2222
u "github.com/ipfs/go-ipfs-util"
23-
"github.com/stretchr/testify/require"
2423
)
2524

2625
// Test that one hung request to a peer doesn't prevent another request
2726
// using that same peer from obeying its context.
2827
func TestHungRequest(t *testing.T) {
29-
ctx := context.Background()
30-
mn, err := mocknet.FullMeshConnected(ctx, 2)
28+
ctx, cancel := context.WithCancel(context.Background())
29+
defer cancel()
30+
31+
mn, err := mocknet.FullMeshLinked(ctx, 2)
3132
if err != nil {
3233
t.Fatal(err)
3334
}
3435
hosts := mn.Hosts()
3536

36-
os := []Option{testPrefix, DisableAutoRefresh()}
37+
os := []Option{testPrefix, DisableAutoRefresh(), Mode(ModeServer)}
3738
d, err := New(ctx, hosts[0], os...)
3839
if err != nil {
3940
t.Fatal(err)
@@ -46,8 +47,18 @@ func TestHungRequest(t *testing.T) {
4647
})
4748
}
4849

49-
require.NoError(t, hosts[0].Peerstore().AddProtocols(hosts[1].ID(), protocol.ConvertToStrings(d.serverProtocols)...))
50-
d.peerFound(ctx, hosts[1].ID(), true)
50+
err = mn.ConnectAllButSelf()
51+
if err != nil {
52+
t.Fatal("failed to connect peers", err)
53+
}
54+
55+
// Wait at a bit for a peer in our routing table.
56+
for i := 0; i < 100 && d.routingTable.Size() == 0; i++ {
57+
time.Sleep(10 * time.Millisecond)
58+
}
59+
if d.routingTable.Size() == 0 {
60+
t.Fatal("failed to fill routing table")
61+
}
5162

5263
ctx1, cancel1 := context.WithTimeout(ctx, 1*time.Second)
5364
defer cancel1()
@@ -66,15 +77,19 @@ func TestHungRequest(t *testing.T) {
6677
t.Errorf("expected to fail with deadline exceeded, got: %s", ctx2.Err())
6778
}
6879
select {
69-
case <-done:
70-
t.Errorf("GetClosestPeers should not have returned yet")
80+
case err = <-done:
81+
t.Error("GetClosestPeers should not have returned yet", err)
7182
default:
7283
err = <-done
7384
if err != context.DeadlineExceeded {
7485
t.Errorf("expected the deadline to be exceeded, got %s", err)
7586
}
7687
}
7788

89+
if d.routingTable.Size() == 0 {
90+
// make sure we didn't just disconnect
91+
t.Fatal("expected peers in the routing table")
92+
}
7893
}
7994

8095
func TestGetFailures(t *testing.T) {
@@ -202,6 +217,11 @@ func TestGetFailures(t *testing.T) {
202217
t.Fatal("shouldnt have provider peers")
203218
}
204219
}
220+
221+
if d.routingTable.Size() == 0 {
222+
// make sure we didn't just disconnect
223+
t.Fatal("expected peers in the routing table")
224+
}
205225
}
206226

207227
func TestNotFound(t *testing.T) {
@@ -217,16 +237,12 @@ func TestNotFound(t *testing.T) {
217237
}
218238
hosts := mn.Hosts()
219239

220-
os := []Option{testPrefix, DisableAutoRefresh()}
240+
os := []Option{testPrefix, DisableAutoRefresh(), Mode(ModeServer)}
221241
d, err := New(ctx, hosts[0], os...)
222242
if err != nil {
223243
t.Fatal(err)
224244
}
225245

226-
for _, p := range hosts {
227-
d.peerFound(ctx, p.ID(), true)
228-
}
229-
230246
// Reply with random peers to every message
231247
for _, host := range hosts {
232248
host := host // shadow loop var
@@ -239,7 +255,8 @@ func TestNotFound(t *testing.T) {
239255

240256
pmes := new(pb.Message)
241257
if err := pbr.ReadMsg(pmes); err != nil {
242-
panic(err)
258+
// this isn't an error, it just means the stream has died.
259+
return
243260
}
244261

245262
switch pmes.GetType() {
@@ -255,13 +272,23 @@ func TestNotFound(t *testing.T) {
255272

256273
resp.CloserPeers = pb.PeerInfosToPBPeers(d.host.Network(), ps)
257274
if err := pbw.WriteMsg(resp); err != nil {
258-
panic(err)
275+
return
259276
}
260277
default:
261278
panic("Shouldnt recieve this.")
262279
}
263280
})
264281
}
282+
for _, peer := range hosts {
283+
if host == peer {
284+
continue
285+
}
286+
_ = peer.Peerstore().AddProtocols(host.ID(), protocol.ConvertToStrings(d.serverProtocols)...)
287+
}
288+
}
289+
290+
for _, p := range hosts {
291+
d.peerFound(ctx, p.ID(), true)
265292
}
266293

267294
// long timeout to ensure timing is not at play.
@@ -275,6 +302,10 @@ func TestNotFound(t *testing.T) {
275302
}
276303
switch err {
277304
case routing.ErrNotFound:
305+
if d.routingTable.Size() == 0 {
306+
// make sure we didn't just disconnect
307+
t.Fatal("expected peers in the routing table")
308+
}
278309
//Success!
279310
return
280311
case u.ErrTimeout:
@@ -299,7 +330,7 @@ func TestLessThanKResponses(t *testing.T) {
299330
}
300331
hosts := mn.Hosts()
301332

302-
os := []Option{testPrefix, DisableAutoRefresh()}
333+
os := []Option{testPrefix, DisableAutoRefresh(), Mode(ModeServer)}
303334
d, err := New(ctx, hosts[0], os...)
304335
if err != nil {
305336
t.Fatal(err)
@@ -371,7 +402,7 @@ func TestMultipleQueries(t *testing.T) {
371402
t.Fatal(err)
372403
}
373404
hosts := mn.Hosts()
374-
os := []Option{testPrefix, DisableAutoRefresh()}
405+
os := []Option{testPrefix, DisableAutoRefresh(), Mode(ModeServer)}
375406
d, err := New(ctx, hosts[0], os...)
376407
if err != nil {
377408
t.Fatal(err)

0 commit comments

Comments
 (0)