Skip to content

Commit 6850259

Browse files
rjl493456442karalabe
authored andcommitted
les: wait for all task goroutines before dropping the peer (#20010)
* les: wait all task routines before drop the peer * les: address comments * les: fix issue
1 parent a978adf commit 6850259

File tree

7 files changed

+84
-53
lines changed

7 files changed

+84
-53
lines changed

les/benchmark.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ import (
2121
"fmt"
2222
"math/big"
2323
"math/rand"
24+
"sync"
2425
"time"
2526

2627
"github.com/ethereum/go-ethereum/common"
@@ -312,7 +313,7 @@ func (h *serverHandler) measure(setup *benchmarkSetup, count int) error {
312313
}()
313314
go func() {
314315
for i := 0; i < count; i++ {
315-
if err := h.handleMsg(serverPeer); err != nil {
316+
if err := h.handleMsg(serverPeer, &sync.WaitGroup{}); err != nil {
316317
errCh <- err
317318
return
318319
}

les/clientpool.go

Lines changed: 38 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -181,52 +181,53 @@ func (f *clientPool) stop() {
181181
f.lock.Unlock()
182182
}
183183

184-
// registerPeer implements peerSetNotify
185-
func (f *clientPool) registerPeer(p *peer) {
186-
c := f.connect(p, 0)
187-
if c != nil {
188-
p.balanceTracker = &c.balanceTracker
189-
}
190-
}
191-
192184
// connect should be called after a successful handshake. If the connection was
193185
// rejected, there is no need to call disconnect.
194-
func (f *clientPool) connect(peer clientPeer, capacity uint64) *clientInfo {
186+
func (f *clientPool) connect(peer clientPeer, capacity uint64) bool {
195187
f.lock.Lock()
196188
defer f.lock.Unlock()
197189

190+
// Short circuit is clientPool is already closed.
198191
if f.closed {
199-
return nil
192+
return false
200193
}
201-
address := peer.freeClientId()
202-
id := peer.ID()
203-
idStr := peerIdToString(id)
194+
// Dedup connected peers.
195+
id, freeID := peer.ID(), peer.freeClientId()
204196
if _, ok := f.connectedMap[id]; ok {
205197
clientRejectedMeter.Mark(1)
206-
log.Debug("Client already connected", "address", address, "id", idStr)
207-
return nil
198+
log.Debug("Client already connected", "address", freeID, "id", peerIdToString(id))
199+
return false
208200
}
201+
// Create a clientInfo but do not add it yet
209202
now := f.clock.Now()
210-
// create a clientInfo but do not add it yet
211-
e := &clientInfo{pool: f, peer: peer, address: address, queueIndex: -1, id: id}
212203
posBalance := f.getPosBalance(id).value
213-
e.priority = posBalance != 0
204+
e := &clientInfo{pool: f, peer: peer, address: freeID, queueIndex: -1, id: id, priority: posBalance != 0}
205+
214206
var negBalance uint64
215-
nb := f.negBalanceMap[address]
207+
nb := f.negBalanceMap[freeID]
216208
if nb != nil {
217209
negBalance = uint64(math.Exp(float64(nb.logValue-f.logOffset(now)) / fixedPointMultiplier))
218210
}
211+
// If the client is a free client, assign with a low free capacity,
212+
// Otherwise assign with the given value(priority client)
219213
if !e.priority {
220214
capacity = f.freeClientCap
221215
}
222-
// check whether it fits into connectedQueue
216+
// Ensure the capacity will never lower than the free capacity.
223217
if capacity < f.freeClientCap {
224218
capacity = f.freeClientCap
225219
}
226220
e.capacity = capacity
221+
227222
e.balanceTracker.init(f.clock, capacity)
228223
e.balanceTracker.setBalance(posBalance, negBalance)
229224
f.setClientPriceFactors(e)
225+
226+
// If the number of clients already connected in the clientpool exceeds its
227+
// capacity, evict some clients with lowest priority.
228+
//
229+
// If the priority of the newly added client is lower than the priority of
230+
// all connected clients, the client is rejected.
230231
newCapacity := f.connectedCapacity + capacity
231232
newCount := f.connectedQueue.Size() + 1
232233
if newCapacity > f.capacityLimit || newCount > f.countLimit {
@@ -248,8 +249,8 @@ func (f *clientPool) connect(peer clientPeer, capacity uint64) *clientInfo {
248249
f.connectedQueue.Push(c)
249250
}
250251
clientRejectedMeter.Mark(1)
251-
log.Debug("Client rejected", "address", address, "id", idStr)
252-
return nil
252+
log.Debug("Client rejected", "address", freeID, "id", peerIdToString(id))
253+
return false
253254
}
254255
// accept new client, drop old ones
255256
for _, c := range kickList {
@@ -258,7 +259,7 @@ func (f *clientPool) connect(peer clientPeer, capacity uint64) *clientInfo {
258259
}
259260
// client accepted, finish setting it up
260261
if nb != nil {
261-
delete(f.negBalanceMap, address)
262+
delete(f.negBalanceMap, freeID)
262263
f.negBalanceQueue.Remove(nb.queueIndex)
263264
}
264265
if e.priority {
@@ -272,13 +273,8 @@ func (f *clientPool) connect(peer clientPeer, capacity uint64) *clientInfo {
272273
e.peer.updateCapacity(e.capacity)
273274
}
274275
clientConnectedMeter.Mark(1)
275-
log.Debug("Client accepted", "address", address)
276-
return e
277-
}
278-
279-
// unregisterPeer implements peerSetNotify
280-
func (f *clientPool) unregisterPeer(p *peer) {
281-
f.disconnect(p)
276+
log.Debug("Client accepted", "address", freeID)
277+
return true
282278
}
283279

284280
// disconnect should be called when a connection is terminated. If the disconnection
@@ -378,6 +374,18 @@ func (f *clientPool) setLimits(count int, totalCap uint64) {
378374
})
379375
}
380376

377+
// requestCost feeds request cost after serving a request from the given peer.
378+
func (f *clientPool) requestCost(p *peer, cost uint64) {
379+
f.lock.Lock()
380+
defer f.lock.Unlock()
381+
382+
info, exist := f.connectedMap[p.ID()]
383+
if !exist || f.closed {
384+
return
385+
}
386+
info.balanceTracker.requestCost(cost)
387+
}
388+
381389
// logOffset calculates the time-dependent offset for the logarithmic
382390
// representation of negative balance
383391
func (f *clientPool) logOffset(now mclock.AbsTime) int64 {

les/clientpool_test.go

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -83,14 +83,14 @@ func testClientPool(t *testing.T, connLimit, clientCount, paidCount int, randomD
8383

8484
// pool should accept new peers up to its connected limit
8585
for i := 0; i < connLimit; i++ {
86-
if pool.connect(poolTestPeer(i), 0) != nil {
86+
if pool.connect(poolTestPeer(i), 0) {
8787
connected[i] = true
8888
} else {
8989
t.Fatalf("Test peer #%d rejected", i)
9090
}
9191
}
9292
// since all accepted peers are new and should not be kicked out, the next one should be rejected
93-
if pool.connect(poolTestPeer(connLimit), 0) != nil {
93+
if pool.connect(poolTestPeer(connLimit), 0) {
9494
connected[connLimit] = true
9595
t.Fatalf("Peer accepted over connected limit")
9696
}
@@ -116,7 +116,7 @@ func testClientPool(t *testing.T, connLimit, clientCount, paidCount int, randomD
116116
connTicks[i] += tickCounter
117117
}
118118
} else {
119-
if pool.connect(poolTestPeer(i), 0) != nil {
119+
if pool.connect(poolTestPeer(i), 0) {
120120
connected[i] = true
121121
connTicks[i] -= tickCounter
122122
}
@@ -159,7 +159,7 @@ func testClientPool(t *testing.T, connLimit, clientCount, paidCount int, randomD
159159
}
160160

161161
// a previously unknown peer should be accepted now
162-
if pool.connect(poolTestPeer(54321), 0) == nil {
162+
if !pool.connect(poolTestPeer(54321), 0) {
163163
t.Fatalf("Previously unknown peer rejected")
164164
}
165165

@@ -173,7 +173,7 @@ func testClientPool(t *testing.T, connLimit, clientCount, paidCount int, randomD
173173
pool.connect(poolTestPeer(i), 0)
174174
}
175175
// expect pool to remember known nodes and kick out one of them to accept a new one
176-
if pool.connect(poolTestPeer(54322), 0) == nil {
176+
if !pool.connect(poolTestPeer(54322), 0) {
177177
t.Errorf("Previously unknown peer rejected after restarting pool")
178178
}
179179
pool.stop()

les/peer.go

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,7 @@ type peer struct {
9494
sendQueue *execQueue
9595

9696
errCh chan error
97+
9798
// responseLock ensures that responses are queued in the same order as
9899
// RequestProcessed is called
99100
responseLock sync.Mutex
@@ -107,11 +108,10 @@ type peer struct {
107108
updateTime mclock.AbsTime
108109
frozen uint32 // 1 if client is in frozen state
109110

110-
fcClient *flowcontrol.ClientNode // nil if the peer is server only
111-
fcServer *flowcontrol.ServerNode // nil if the peer is client only
112-
fcParams flowcontrol.ServerParams
113-
fcCosts requestCostTable
114-
balanceTracker *balanceTracker // set by clientPool.connect, used and removed by serverHandler.
111+
fcClient *flowcontrol.ClientNode // nil if the peer is server only
112+
fcServer *flowcontrol.ServerNode // nil if the peer is client only
113+
fcParams flowcontrol.ServerParams
114+
fcCosts requestCostTable
115115

116116
trusted bool
117117
onlyAnnounce bool

les/server.go

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -112,9 +112,7 @@ func NewLesServer(e *eth.Ethereum, config *eth.Config) (*LesServer, error) {
112112
maxCapacity = totalRecharge
113113
}
114114
srv.fcManager.SetCapacityLimits(srv.freeCapacity, maxCapacity, srv.freeCapacity*2)
115-
116115
srv.clientPool = newClientPool(srv.chainDb, srv.freeCapacity, 10000, mclock.System{}, func(id enode.ID) { go srv.peers.Unregister(peerIdToString(id)) })
117-
srv.peers.notify(srv.clientPool)
118116

119117
checkpoint := srv.latestLocalCheckpoint()
120118
if !checkpoint.Empty() {

les/server_handler.go

Lines changed: 32 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,10 @@ const (
5454
MaxTxStatus = 256 // Amount of transactions to queried per request
5555
)
5656

57-
var errTooManyInvalidRequest = errors.New("too many invalid requests made")
57+
var (
58+
errTooManyInvalidRequest = errors.New("too many invalid requests made")
59+
errFullClientPool = errors.New("client pool is full")
60+
)
5861

5962
// serverHandler is responsible for serving light client and process
6063
// all incoming light requests.
@@ -124,23 +127,26 @@ func (h *serverHandler) handle(p *peer) error {
124127
}
125128
defer p.fcClient.Disconnect()
126129

130+
// Disconnect the inbound peer if it's rejected by clientPool
131+
if !h.server.clientPool.connect(p, 0) {
132+
p.Log().Debug("Light Ethereum peer registration failed", "err", errFullClientPool)
133+
return errFullClientPool
134+
}
127135
// Register the peer locally
128136
if err := h.server.peers.Register(p); err != nil {
137+
h.server.clientPool.disconnect(p)
129138
p.Log().Error("Light Ethereum peer registration failed", "err", err)
130139
return err
131140
}
132141
clientConnectionGauge.Update(int64(h.server.peers.Len()))
133142

134-
// add dummy balance tracker for tests
135-
if p.balanceTracker == nil {
136-
p.balanceTracker = &balanceTracker{}
137-
p.balanceTracker.init(&mclock.System{}, 1)
138-
}
143+
var wg sync.WaitGroup // Wait group used to track all in-flight task routines.
139144

140145
connectedAt := mclock.Now()
141146
defer func() {
142-
p.balanceTracker = nil
147+
wg.Wait() // Ensure all background task routines have exited.
143148
h.server.peers.Unregister(p.id)
149+
h.server.clientPool.disconnect(p)
144150
clientConnectionGauge.Update(int64(h.server.peers.Len()))
145151
connectionTimer.Update(time.Duration(mclock.Now() - connectedAt))
146152
}()
@@ -153,7 +159,7 @@ func (h *serverHandler) handle(p *peer) error {
153159
return err
154160
default:
155161
}
156-
if err := h.handleMsg(p); err != nil {
162+
if err := h.handleMsg(p, &wg); err != nil {
157163
p.Log().Debug("Light Ethereum message handling failed", "err", err)
158164
return err
159165
}
@@ -162,7 +168,7 @@ func (h *serverHandler) handle(p *peer) error {
162168

163169
// handleMsg is invoked whenever an inbound message is received from a remote
164170
// peer. The remote connection is torn down upon returning any error.
165-
func (h *serverHandler) handleMsg(p *peer) error {
171+
func (h *serverHandler) handleMsg(p *peer, wg *sync.WaitGroup) error {
166172
// Read the next message from the remote peer, and ensure it's fully consumed
167173
msg, err := p.rw.ReadMsg()
168174
if err != nil {
@@ -243,7 +249,7 @@ func (h *serverHandler) handleMsg(p *peer) error {
243249
// Feed cost tracker request serving statistic.
244250
h.server.costTracker.updateStats(msg.Code, amount, servingTime, realCost)
245251
// Reduce priority "balance" for the specific peer.
246-
p.balanceTracker.requestCost(realCost)
252+
h.server.clientPool.requestCost(p, realCost)
247253
}
248254
if reply != nil {
249255
p.queueSend(func() {
@@ -273,7 +279,9 @@ func (h *serverHandler) handleMsg(p *peer) error {
273279
}
274280
query := req.Query
275281
if accept(req.ReqID, query.Amount, MaxHeaderFetch) {
282+
wg.Add(1)
276283
go func() {
284+
defer wg.Done()
277285
hashMode := query.Origin.Hash != (common.Hash{})
278286
first := true
279287
maxNonCanonical := uint64(100)
@@ -387,7 +395,9 @@ func (h *serverHandler) handleMsg(p *peer) error {
387395
)
388396
reqCnt := len(req.Hashes)
389397
if accept(req.ReqID, uint64(reqCnt), MaxBodyFetch) {
398+
wg.Add(1)
390399
go func() {
400+
defer wg.Done()
391401
for i, hash := range req.Hashes {
392402
if i != 0 && !task.waitOrStop() {
393403
sendResponse(req.ReqID, 0, nil, task.servingTime)
@@ -433,7 +443,9 @@ func (h *serverHandler) handleMsg(p *peer) error {
433443
)
434444
reqCnt := len(req.Reqs)
435445
if accept(req.ReqID, uint64(reqCnt), MaxCodeFetch) {
446+
wg.Add(1)
436447
go func() {
448+
defer wg.Done()
437449
for i, request := range req.Reqs {
438450
if i != 0 && !task.waitOrStop() {
439451
sendResponse(req.ReqID, 0, nil, task.servingTime)
@@ -502,7 +514,9 @@ func (h *serverHandler) handleMsg(p *peer) error {
502514
)
503515
reqCnt := len(req.Hashes)
504516
if accept(req.ReqID, uint64(reqCnt), MaxReceiptFetch) {
517+
wg.Add(1)
505518
go func() {
519+
defer wg.Done()
506520
for i, hash := range req.Hashes {
507521
if i != 0 && !task.waitOrStop() {
508522
sendResponse(req.ReqID, 0, nil, task.servingTime)
@@ -557,7 +571,9 @@ func (h *serverHandler) handleMsg(p *peer) error {
557571
)
558572
reqCnt := len(req.Reqs)
559573
if accept(req.ReqID, uint64(reqCnt), MaxProofsFetch) {
574+
wg.Add(1)
560575
go func() {
576+
defer wg.Done()
561577
nodes := light.NewNodeSet()
562578

563579
for i, request := range req.Reqs {
@@ -658,7 +674,9 @@ func (h *serverHandler) handleMsg(p *peer) error {
658674
)
659675
reqCnt := len(req.Reqs)
660676
if accept(req.ReqID, uint64(reqCnt), MaxHelperTrieProofsFetch) {
677+
wg.Add(1)
661678
go func() {
679+
defer wg.Done()
662680
var (
663681
lastIdx uint64
664682
lastType uint
@@ -725,7 +743,9 @@ func (h *serverHandler) handleMsg(p *peer) error {
725743
}
726744
reqCnt := len(req.Txs)
727745
if accept(req.ReqID, uint64(reqCnt), MaxTxSend) {
746+
wg.Add(1)
728747
go func() {
748+
defer wg.Done()
729749
stats := make([]light.TxStatus, len(req.Txs))
730750
for i, tx := range req.Txs {
731751
if i != 0 && !task.waitOrStop() {
@@ -771,7 +791,9 @@ func (h *serverHandler) handleMsg(p *peer) error {
771791
}
772792
reqCnt := len(req.Hashes)
773793
if accept(req.ReqID, uint64(reqCnt), MaxTxStatus) {
794+
wg.Add(1)
774795
go func() {
796+
defer wg.Done()
775797
stats := make([]light.TxStatus, len(req.Hashes))
776798
for i, hash := range req.Hashes {
777799
if i != 0 && !task.waitOrStop() {

les/test_helper.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -280,6 +280,8 @@ func newTestServerHandler(blocks int, indexers []*core.ChainIndexer, db ethdb.Da
280280
}
281281
server.costTracker, server.freeCapacity = newCostTracker(db, server.config)
282282
server.costTracker.testCostList = testCostList(0) // Disable flow control mechanism.
283+
server.clientPool = newClientPool(db, 1, 10000, clock, nil)
284+
server.clientPool.setLimits(10000, 10000) // Assign enough capacity for clientpool
283285
server.handler = newServerHandler(server, simulation.Blockchain(), db, txpool, func() bool { return true })
284286
if server.oracle != nil {
285287
server.oracle.start(simulation)

0 commit comments

Comments
 (0)