Skip to content

Commit 4cd90e0

Browse files
committed
p2p/discover, p2p/enode: rework endpoint proof handling, packet logging (#18963)
This change resolves multiple issues around handling of endpoint proofs. The proof is now done separately for each IP and completing the proof requires a matching ping hash. Also remove waitping because it's equivalent to sleep. waitping was slightly more efficient, but that may cause issues with findnode if packets are reordered and the remote end sees findnode before pong. Logging of received packets was hitherto done after handling the packet, which meant that sent replies were logged before the packet that generated them. This change splits up packet handling into 'preverify' and 'handle'. The error from 'preverify' is logged, but 'handle' happens after the message is logged. This fixes the order. Packet logs now contain the node ID.
1 parent 1f3dfed commit 4cd90e0

File tree

8 files changed

+595
-332
lines changed

8 files changed

+595
-332
lines changed

p2p/discover/node.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,8 @@ import (
3333
// The fields of Node may not be modified.
3434
type node struct {
3535
enode.Node
36-
addedAt time.Time // time when the node was added to the table
36+
addedAt time.Time // time when the node was added to the table
37+
livenessChecks uint // how often liveness was checked
3738
}
3839

3940
type encPubkey [64]byte

p2p/discover/table.go

Lines changed: 39 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -75,8 +75,10 @@ type Table struct {
7575
net transport
7676
refreshReq chan chan struct{}
7777
initDone chan struct{}
78-
closeReq chan struct{}
79-
closed chan struct{}
78+
79+
closeOnce sync.Once
80+
closeReq chan struct{}
81+
closed chan struct{}
8082

8183
nodeAddedHook func(*node) // for testing
8284
}
@@ -180,16 +182,14 @@ func (tab *Table) ReadRandomNodes(buf []*enode.Node) (n int) {
180182

181183
// Close terminates the network listener and flushes the node database.
182184
func (tab *Table) Close() {
183-
if tab.net != nil {
184-
tab.net.close()
185-
}
186-
187-
select {
188-
case <-tab.closed:
189-
// already closed.
190-
case tab.closeReq <- struct{}{}:
191-
<-tab.closed // wait for refreshLoop to end.
192-
}
185+
tab.closeOnce.Do(func() {
186+
if tab.net != nil {
187+
tab.net.close()
188+
}
189+
// Wait for loop to end.
190+
close(tab.closeReq)
191+
<-tab.closed
192+
})
193193
}
194194

195195
// setFallbackNodes sets the initial points of contact. These nodes
@@ -290,31 +290,39 @@ func (tab *Table) lookup(targetKey encPubkey, refreshIfEmpty bool) []*node {
290290
// we have asked all closest nodes, stop the search
291291
break
292292
}
293-
// wait for the next reply
294-
for _, n := range <-reply {
295-
if n != nil && !seen[n.ID()] {
296-
seen[n.ID()] = true
297-
result.push(n, bucketSize)
293+
select {
294+
case nodes := <-reply:
295+
for _, n := range nodes {
296+
if n != nil && !seen[n.ID()] {
297+
seen[n.ID()] = true
298+
result.push(n, bucketSize)
299+
}
298300
}
301+
case <-tab.closeReq:
302+
return nil // shutdown, no need to continue.
299303
}
300304
pendingQueries--
301305
}
302306
return result.entries
303307
}
304308

305309
func (tab *Table) findnode(n *node, targetKey encPubkey, reply chan<- []*node) {
306-
fails := tab.db.FindFails(n.ID())
310+
fails := tab.db.FindFails(n.ID(), n.IP())
307311
r, err := tab.net.findnode(n.ID(), n.addr(), targetKey)
308-
if err != nil || len(r) == 0 {
312+
if err == errClosed {
313+
// Avoid recording failures on shutdown.
314+
reply <- nil
315+
return
316+
} else if err != nil || len(r) == 0 {
309317
fails++
310-
tab.db.UpdateFindFails(n.ID(), fails)
318+
tab.db.UpdateFindFails(n.ID(), n.IP(), fails)
311319
log.Trace("Findnode failed", "id", n.ID(), "failcount", fails, "err", err)
312320
if fails >= maxFindnodeFailures {
313321
log.Trace("Too many findnode failures, dropping", "id", n.ID(), "failcount", fails)
314322
tab.delete(n)
315323
}
316324
} else if fails > 0 {
317-
tab.db.UpdateFindFails(n.ID(), fails-1)
325+
tab.db.UpdateFindFails(n.ID(), n.IP(), fails-1)
318326
}
319327

320328
// Grab as many nodes as possible. Some of them might not be alive anymore, but we'll
@@ -329,7 +337,7 @@ func (tab *Table) refresh() <-chan struct{} {
329337
done := make(chan struct{})
330338
select {
331339
case tab.refreshReq <- done:
332-
case <-tab.closed:
340+
case <-tab.closeReq:
333341
close(done)
334342
}
335343
return done
@@ -433,7 +441,7 @@ func (tab *Table) loadSeedNodes() {
433441
seeds = append(seeds, tab.nursery...)
434442
for i := range seeds {
435443
seed := seeds[i]
436-
age := log.Lazy{Fn: func() interface{} { return time.Since(tab.db.LastPongReceived(seed.ID())) }}
444+
age := log.Lazy{Fn: func() interface{} { return time.Since(tab.db.LastPongReceived(seed.ID(), seed.IP())) }}
437445
log.Trace("Found seed node in database", "id", seed.ID(), "addr", seed.addr(), "age", age)
438446
tab.add(seed)
439447
}
@@ -458,16 +466,17 @@ func (tab *Table) doRevalidate(done chan<- struct{}) {
458466
b := tab.buckets[bi]
459467
if err == nil {
460468
// The node responded, move it to the front.
461-
log.Debug("Revalidated node", "b", bi, "id", last.ID())
469+
last.livenessChecks++
470+
log.Debug("Revalidated node", "b", bi, "id", last.ID(), "checks", last.livenessChecks)
462471
b.bump(last)
463472
return
464473
}
465474
// No reply received, pick a replacement or delete the node if there aren't
466475
// any replacements.
467476
if r := tab.replace(b, last); r != nil {
468-
log.Debug("Replaced dead node", "b", bi, "id", last.ID(), "ip", last.IP(), "r", r.ID(), "rip", r.IP())
477+
log.Debug("Replaced dead node", "b", bi, "id", last.ID(), "ip", last.IP(), "checks", last.livenessChecks, "r", r.ID(), "rip", r.IP())
469478
} else {
470-
log.Debug("Removed dead node", "b", bi, "id", last.ID(), "ip", last.IP())
479+
log.Debug("Removed dead node", "b", bi, "id", last.ID(), "ip", last.IP(), "checks", last.livenessChecks)
471480
}
472481
}
473482

@@ -502,7 +511,7 @@ func (tab *Table) copyLiveNodes() {
502511
now := time.Now()
503512
for _, b := range &tab.buckets {
504513
for _, n := range b.entries {
505-
if now.Sub(n.addedAt) >= seedMinTableTime {
514+
if n.livenessChecks > 0 && now.Sub(n.addedAt) >= seedMinTableTime {
506515
tab.db.UpdateNode(unwrapNode(n))
507516
}
508517
}
@@ -518,7 +527,9 @@ func (tab *Table) closest(target enode.ID, nresults int) *nodesByDistance {
518527
close := &nodesByDistance{target: target}
519528
for _, b := range &tab.buckets {
520529
for _, n := range b.entries {
521-
close.push(n, nresults)
530+
if n.livenessChecks > 0 {
531+
close.push(n, nresults)
532+
}
522533
}
523534
}
524535
return close
@@ -572,23 +583,6 @@ func (tab *Table) addThroughPing(n *node) {
572583
tab.add(n)
573584
}
574585

575-
// stuff adds nodes the table to the end of their corresponding bucket
576-
// if the bucket is not full. The caller must not hold tab.mutex.
577-
func (tab *Table) stuff(nodes []*node) {
578-
tab.mutex.Lock()
579-
defer tab.mutex.Unlock()
580-
581-
for _, n := range nodes {
582-
if n.ID() == tab.self().ID() {
583-
continue // don't add self
584-
}
585-
b := tab.bucket(n.ID())
586-
if len(b.entries) < bucketSize {
587-
tab.bumpOrAdd(b, n)
588-
}
589-
}
590-
}
591-
592586
// delete removes an entry from the node table. It is used to evacuate dead nodes.
593587
func (tab *Table) delete(node *node) {
594588
tab.mutex.Lock()

p2p/discover/table_test.go

Lines changed: 22 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -50,8 +50,8 @@ func TestTable_pingReplace(t *testing.T) {
5050
func testPingReplace(t *testing.T, newNodeIsResponding, lastInBucketIsResponding bool) {
5151
transport := newPingRecorder()
5252
tab, db := newTestTable(transport)
53-
defer tab.Close()
5453
defer db.Close()
54+
defer tab.Close()
5555

5656
<-tab.initDone
5757

@@ -137,8 +137,8 @@ func TestBucket_bumpNoDuplicates(t *testing.T) {
137137
func TestTable_IPLimit(t *testing.T) {
138138
transport := newPingRecorder()
139139
tab, db := newTestTable(transport)
140-
defer tab.Close()
141140
defer db.Close()
141+
defer tab.Close()
142142

143143
for i := 0; i < tableIPLimit+1; i++ {
144144
n := nodeAtDistance(tab.self().ID(), i, net.IP{172, 0, 1, byte(i)})
@@ -153,8 +153,8 @@ func TestTable_IPLimit(t *testing.T) {
153153
func TestTable_BucketIPLimit(t *testing.T) {
154154
transport := newPingRecorder()
155155
tab, db := newTestTable(transport)
156-
defer tab.Close()
157156
defer db.Close()
157+
defer tab.Close()
158158

159159
d := 3
160160
for i := 0; i < bucketIPLimit+1; i++ {
@@ -173,9 +173,9 @@ func TestTable_closest(t *testing.T) {
173173
// for any node table, Target and N
174174
transport := newPingRecorder()
175175
tab, db := newTestTable(transport)
176-
defer tab.Close()
177176
defer db.Close()
178-
tab.stuff(test.All)
177+
defer tab.Close()
178+
fillTable(tab, test.All)
179179

180180
// check that closest(Target, N) returns nodes
181181
result := tab.closest(test.Target, test.N).entries
@@ -234,13 +234,13 @@ func TestTable_ReadRandomNodesGetAll(t *testing.T) {
234234
test := func(buf []*enode.Node) bool {
235235
transport := newPingRecorder()
236236
tab, db := newTestTable(transport)
237-
defer tab.Close()
238237
defer db.Close()
238+
defer tab.Close()
239239
<-tab.initDone
240240

241241
for i := 0; i < len(buf); i++ {
242242
ld := cfg.Rand.Intn(len(tab.buckets))
243-
tab.stuff([]*node{nodeAtDistance(tab.self().ID(), ld, intIP(ld))})
243+
fillTable(tab, []*node{nodeAtDistance(tab.self().ID(), ld, intIP(ld))})
244244
}
245245
gotN := tab.ReadRandomNodes(buf)
246246
if gotN != tab.len() {
@@ -272,25 +272,29 @@ func (*closeTest) Generate(rand *rand.Rand, size int) reflect.Value {
272272
N: rand.Intn(bucketSize),
273273
}
274274
for _, id := range gen([]enode.ID{}, rand).([]enode.ID) {
275-
n := enode.SignNull(new(enr.Record), id)
276-
t.All = append(t.All, wrapNode(n))
275+
r := new(enr.Record)
276+
r.Set(enr.IP(genIP(rand)))
277+
n := wrapNode(enode.SignNull(r, id))
278+
n.livenessChecks = 1
279+
t.All = append(t.All, n)
277280
}
278281
return reflect.ValueOf(t)
279282
}
280283

281284
func TestTable_Lookup(t *testing.T) {
282285
tab, db := newTestTable(lookupTestnet)
283-
defer tab.Close()
284286
defer db.Close()
287+
defer tab.Close()
285288

286289
// lookup on empty table returns no nodes
287290
if results := tab.lookup(lookupTestnet.target, false); len(results) > 0 {
288291
t.Fatalf("lookup on empty table returned %d results: %#v", len(results), results)
289292
}
290293
// seed table with initial node (otherwise lookup will terminate immediately)
291294
seedKey, _ := decodePubkey(lookupTestnet.dists[256][0])
292-
seed := wrapNode(enode.NewV4(seedKey, net.IP{}, 0, 256))
293-
tab.stuff([]*node{seed})
295+
seed := wrapNode(enode.NewV4(seedKey, net.IP{127, 0, 0, 1}, 0, 256))
296+
seed.livenessChecks = 1
297+
fillTable(tab, []*node{seed})
294298

295299
results := tab.lookup(lookupTestnet.target, true)
296300
t.Logf("results:")
@@ -578,6 +582,12 @@ func gen(typ interface{}, rand *rand.Rand) interface{} {
578582
return v.Interface()
579583
}
580584

585+
func genIP(rand *rand.Rand) net.IP {
586+
ip := make(net.IP, 4)
587+
rand.Read(ip)
588+
return ip
589+
}
590+
581591
func quickcfg() *quick.Config {
582592
return &quick.Config{
583593
MaxCount: 5000,

p2p/discover/table_util_test.go

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,23 @@ func fillBucket(tab *Table, n *node) (last *node) {
8383
return b.entries[bucketSize-1]
8484
}
8585

86+
// fillTable adds nodes the table to the end of their corresponding bucket
87+
// if the bucket is not full. The caller must not hold tab.mutex.
88+
func fillTable(tab *Table, nodes []*node) {
89+
tab.mutex.Lock()
90+
defer tab.mutex.Unlock()
91+
92+
for _, n := range nodes {
93+
if n.ID() == tab.self().ID() {
94+
continue // don't add self
95+
}
96+
b := tab.bucket(n.ID())
97+
if len(b.entries) < bucketSize {
98+
tab.bumpOrAdd(b, n)
99+
}
100+
}
101+
}
102+
86103
type pingRecorder struct {
87104
mu sync.Mutex
88105
dead, pinged map[enode.ID]bool
@@ -109,10 +126,6 @@ func (t *pingRecorder) findnode(toid enode.ID, toaddr *net.UDPAddr, target encPu
109126
return nil, nil
110127
}
111128

112-
func (t *pingRecorder) waitping(from enode.ID) error {
113-
return nil // remote always pings
114-
}
115-
116129
func (t *pingRecorder) ping(toid enode.ID, toaddr *net.UDPAddr) error {
117130
t.mu.Lock()
118131
defer t.mu.Unlock()

0 commit comments

Comments
 (0)