Skip to content

Commit 2adcc31

Browse files
committed
p2p/discover: new distance metric based on sha3(id)
The previous metric was pubkey1^pubkey2, as specified in the Kademlia paper. We missed that EC public keys are not uniformly distributed. Using the hash of the public keys addresses that. It also makes it a bit harder to generate node IDs that are close to a particular node.
1 parent d457a11 commit 2adcc31

File tree

6 files changed

+354
-106
lines changed

6 files changed

+354
-106
lines changed

p2p/discover/node.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -219,7 +219,7 @@ func recoverNodeID(hash, sig []byte) (id NodeID, err error) {
219219
// distcmp compares the distances a->target and b->target.
220220
// Returns -1 if a is closer to target, 1 if b is closer to target
221221
// and 0 if they are equal.
222-
func distcmp(target, a, b NodeID) int {
222+
func distcmp(target, a, b common.Hash) int {
223223
for i := range target {
224224
da := a[i] ^ target[i]
225225
db := b[i] ^ target[i]
@@ -269,7 +269,7 @@ var lzcount = [256]int{
269269
}
270270

271271
// logdist returns the logarithmic distance between a and b, log2(a ^ b).
272-
func logdist(a, b NodeID) int {
272+
func logdist(a, b common.Hash) int {
273273
lz := 0
274274
for i := range a {
275275
x := a[i] ^ b[i]
@@ -283,8 +283,8 @@ func logdist(a, b NodeID) int {
283283
return len(a)*8 - lz
284284
}
285285

286-
// randomID returns a random NodeID such that logdist(a, b) == n
287-
func randomID(a NodeID, n int) (b NodeID) {
286+
// hashAtDistance returns a random hash such that logdist(a, b) == n
287+
func hashAtDistance(a common.Hash, n int) (b common.Hash) {
288288
if n == 0 {
289289
return a
290290
}

p2p/discover/node_test.go

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import (
99
"testing/quick"
1010
"time"
1111

12+
"github.com/ethereum/go-ethereum/common"
1213
"github.com/ethereum/go-ethereum/crypto"
1314
)
1415

@@ -169,7 +170,7 @@ func TestNodeID_pubkeyBad(t *testing.T) {
169170
}
170171

171172
func TestNodeID_distcmp(t *testing.T) {
172-
distcmpBig := func(target, a, b NodeID) int {
173+
distcmpBig := func(target, a, b common.Hash) int {
173174
tbig := new(big.Int).SetBytes(target[:])
174175
abig := new(big.Int).SetBytes(a[:])
175176
bbig := new(big.Int).SetBytes(b[:])
@@ -182,15 +183,15 @@ func TestNodeID_distcmp(t *testing.T) {
182183

183184
// the random tests is likely to miss the case where they're equal.
184185
func TestNodeID_distcmpEqual(t *testing.T) {
185-
base := NodeID{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}
186-
x := NodeID{15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}
186+
base := common.Hash{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}
187+
x := common.Hash{15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}
187188
if distcmp(base, x, x) != 0 {
188189
t.Errorf("distcmp(base, x, x) != 0")
189190
}
190191
}
191192

192193
func TestNodeID_logdist(t *testing.T) {
193-
logdistBig := func(a, b NodeID) int {
194+
logdistBig := func(a, b common.Hash) int {
194195
abig, bbig := new(big.Int).SetBytes(a[:]), new(big.Int).SetBytes(b[:])
195196
return new(big.Int).Xor(abig, bbig).BitLen()
196197
}
@@ -201,19 +202,19 @@ func TestNodeID_logdist(t *testing.T) {
201202

202203
// the random tests is likely to miss the case where they're equal.
203204
func TestNodeID_logdistEqual(t *testing.T) {
204-
x := NodeID{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}
205+
x := common.Hash{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}
205206
if logdist(x, x) != 0 {
206207
t.Errorf("logdist(x, x) != 0")
207208
}
208209
}
209210

210-
func TestNodeID_randomID(t *testing.T) {
211+
func TestNodeID_hashAtDistance(t *testing.T) {
211212
// we don't use quick.Check here because its output isn't
212213
// very helpful when the test fails.
213214
for i := 0; i < quickcfg.MaxCount; i++ {
214-
a := gen(NodeID{}, quickrand).(NodeID)
215-
dist := quickrand.Intn(len(NodeID{}) * 8)
216-
result := randomID(a, dist)
215+
a := gen(common.Hash{}, quickrand).(common.Hash)
216+
dist := quickrand.Intn(len(common.Hash{}) * 8)
217+
result := hashAtDistance(a, dist)
217218
actualdist := logdist(result, a)
218219

219220
if dist != actualdist {
@@ -224,6 +225,9 @@ func TestNodeID_randomID(t *testing.T) {
224225
}
225226
}
226227

228+
// TODO: this can be dropped when we require Go >= 1.5
229+
// because testing/quick learned to generate arrays in 1.5.
230+
227231
func (NodeID) Generate(rand *rand.Rand, size int) reflect.Value {
228232
var id NodeID
229233
m := rand.Intn(len(id))

p2p/discover/table.go

Lines changed: 30 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -7,20 +7,24 @@
77
package discover
88

99
import (
10+
"crypto/rand"
1011
"net"
1112
"sort"
1213
"sync"
1314
"time"
1415

16+
"github.com/ethereum/go-ethereum/common"
1517
"github.com/ethereum/go-ethereum/crypto"
1618
"github.com/ethereum/go-ethereum/logger"
1719
"github.com/ethereum/go-ethereum/logger/glog"
1820
)
1921

2022
const (
21-
alpha = 3 // Kademlia concurrency factor
22-
bucketSize = 16 // Kademlia bucket size
23-
nBuckets = nodeIDBits + 1 // Number of buckets
23+
alpha = 3 // Kademlia concurrency factor
24+
bucketSize = 16 // Kademlia bucket size
25+
hashBits = len(common.Hash{}) * 8
26+
nBuckets = hashBits + 1 // Number of buckets
27+
2428
maxBondingPingPongs = 10
2529
)
2630

@@ -116,21 +120,23 @@ func (tab *Table) Bootstrap(nodes []*Node) {
116120
// Lookup performs a network search for nodes close
117121
// to the given target. It approaches the target by querying
118122
// nodes that are closer to it on each iteration.
119-
func (tab *Table) Lookup(target NodeID) []*Node {
123+
// The given target does not need to be an actual node
124+
// identifier.
125+
func (tab *Table) Lookup(targetID NodeID) []*Node {
120126
var (
127+
target = crypto.Sha3Hash(targetID[:])
121128
asked = make(map[NodeID]bool)
122129
seen = make(map[NodeID]bool)
123130
reply = make(chan []*Node, alpha)
124131
pendingQueries = 0
125132
)
126-
// don't query further if we hit the target or ourself.
133+
// don't query further if we hit ourself.
127134
// unlikely to happen often in practice.
128-
asked[target] = true
129135
asked[tab.self.ID] = true
130136

131137
tab.mutex.Lock()
132138
// update last lookup stamp (for refresh logic)
133-
tab.buckets[logdist(tab.self.ID, target)].lastLookup = time.Now()
139+
tab.buckets[logdist(tab.self.sha, target)].lastLookup = time.Now()
134140
// generate initial result set
135141
result := tab.closest(target, bucketSize)
136142
tab.mutex.Unlock()
@@ -143,7 +149,7 @@ func (tab *Table) Lookup(target NodeID) []*Node {
143149
asked[n.ID] = true
144150
pendingQueries++
145151
go func() {
146-
r, _ := tab.net.findnode(n.ID, n.addr(), target)
152+
r, _ := tab.net.findnode(n.ID, n.addr(), targetID)
147153
reply <- tab.bondall(r)
148154
}()
149155
}
@@ -166,17 +172,16 @@ func (tab *Table) Lookup(target NodeID) []*Node {
166172

167173
// refresh performs a lookup for a random target to keep buckets full.
168174
func (tab *Table) refresh() {
169-
ld := -1 // logdist of chosen bucket
170-
tab.mutex.Lock()
171-
for i, b := range tab.buckets {
172-
if i > 0 && b.lastLookup.Before(time.Now().Add(-1*time.Hour)) {
173-
ld = i
174-
break
175-
}
176-
}
177-
tab.mutex.Unlock()
178-
179-
result := tab.Lookup(randomID(tab.self.ID, ld))
175+
// The Kademlia paper specifies that the bucket refresh should
176+
// perform a refresh in the least recently used bucket. We cannot
177+
// adhere to this because the findnode target is a 512bit value
178+
// (not hash-sized) and it is not easily possible to generate a
179+
// sha3 preimage that falls into a chosen bucket.
180+
//
181+
// We perform a lookup with a random target instead.
182+
var target NodeID
183+
rand.Read(target[:])
184+
result := tab.Lookup(target)
180185
if len(result) == 0 {
181186
// Pick a batch of previously know seeds to lookup with
182187
seeds := tab.db.querySeeds(10)
@@ -196,7 +201,7 @@ func (tab *Table) refresh() {
196201

197202
// closest returns the n nodes in the table that are closest to the
198203
// given id. The caller must hold tab.mutex.
199-
func (tab *Table) closest(target NodeID, nresults int) *nodesByDistance {
204+
func (tab *Table) closest(target common.Hash, nresults int) *nodesByDistance {
200205
// This is a very wasteful way to find the closest nodes but
201206
// obviously correct. I believe that tree-based buckets would make
202207
// this easier to implement efficiently.
@@ -278,7 +283,8 @@ func (tab *Table) bond(pinged bool, id NodeID, addr *net.UDPAddr, tcpPort uint16
278283
}
279284
tab.mutex.Lock()
280285
defer tab.mutex.Unlock()
281-
if b := tab.buckets[logdist(tab.self.ID, n.ID)]; !b.bump(n) {
286+
b := tab.buckets[logdist(tab.self.sha, n.sha)]
287+
if !b.bump(n) {
282288
tab.pingreplace(n, b)
283289
}
284290
return n, nil
@@ -346,7 +352,7 @@ outer:
346352
// don't add self.
347353
continue
348354
}
349-
bucket := tab.buckets[logdist(tab.self.ID, n.ID)]
355+
bucket := tab.buckets[logdist(tab.self.sha, n.sha)]
350356
for i := range bucket.entries {
351357
if bucket.entries[i].ID == n.ID {
352358
// already in bucket
@@ -375,13 +381,13 @@ func (b *bucket) bump(n *Node) bool {
375381
// distance to target.
376382
type nodesByDistance struct {
377383
entries []*Node
378-
target NodeID
384+
target common.Hash
379385
}
380386

381387
// push adds the given node to the list, keeping the total size below maxElems.
382388
func (h *nodesByDistance) push(n *Node, maxElems int) {
383389
ix := sort.Search(len(h.entries), func(i int) bool {
384-
return distcmp(h.target, h.entries[i].ID, n.ID) > 0
390+
return distcmp(h.target, h.entries[i].sha, n.sha) > 0
385391
})
386392
if len(h.entries) < maxElems {
387393
h.entries = append(h.entries, n)

0 commit comments

Comments
 (0)