Skip to content

Commit 4a3c4e4

Browse files
authored
Merge pull request #8497 from ziggie1984/shutdown-bugfix
routing: shutdown chanrouter correctly.
2 parents e3dd886 + 0adcb5c commit 4a3c4e4

File tree

14 files changed

+229
-82
lines changed

14 files changed

+229
-82
lines changed

chainntnfs/bitcoindnotify/bitcoind.go

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,12 @@ func (b *BitcoindNotifier) Stop() error {
151151

152152
close(epochClient.epochChan)
153153
}
154-
b.txNotifier.TearDown()
154+
155+
// The txNotifier is only initialized in the start method therefore we
156+
// need to make sure we don't access a nil pointer here.
157+
if b.txNotifier != nil {
158+
b.txNotifier.TearDown()
159+
}
155160

156161
// Stop the mempool notifier.
157162
b.memNotifier.TearDown()

chainntnfs/neutrinonotify/neutrino.go

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,12 @@ func (n *NeutrinoNotifier) Stop() error {
152152

153153
close(epochClient.epochChan)
154154
}
155-
n.txNotifier.TearDown()
155+
156+
// The txNotifier is only initialized in the start method therefore we
157+
// need to make sure we don't access a nil pointer here.
158+
if n.txNotifier != nil {
159+
n.txNotifier.TearDown()
160+
}
156161

157162
return nil
158163
}

chanfitness/chaneventstore.go

Lines changed: 28 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,9 @@ package chanfitness
1212

1313
import (
1414
"errors"
15+
"fmt"
1516
"sync"
17+
"sync/atomic"
1618
"time"
1719

1820
"github.com/btcsuite/btcd/wire"
@@ -48,6 +50,9 @@ var (
4850
// ChannelEventStore maintains a set of event logs for the node's channels to
4951
// provide insight into the performance and health of channels.
5052
type ChannelEventStore struct {
53+
started atomic.Bool
54+
stopped atomic.Bool
55+
5156
cfg *Config
5257

5358
// peers tracks all of our currently monitored peers and their channels.
@@ -142,7 +147,11 @@ func NewChannelEventStore(config *Config) *ChannelEventStore {
142147
// information from the store. If this function fails, it cancels its existing
143148
// subscriptions and returns an error.
144149
func (c *ChannelEventStore) Start() error {
145-
log.Info("ChannelEventStore starting")
150+
log.Info("ChannelEventStore starting...")
151+
152+
if c.started.Swap(true) {
153+
return fmt.Errorf("ChannelEventStore started more than once")
154+
}
146155

147156
// Create a subscription to channel events.
148157
channelClient, err := c.cfg.SubscribeChannelEvents()
@@ -198,21 +207,36 @@ func (c *ChannelEventStore) Start() error {
198207
cancel: cancel,
199208
})
200209

210+
log.Debug("ChannelEventStore started")
211+
201212
return nil
202213
}
203214

204215
// Stop terminates all goroutines started by the event store.
205-
func (c *ChannelEventStore) Stop() {
216+
func (c *ChannelEventStore) Stop() error {
206217
log.Info("ChannelEventStore shutting down...")
207-
defer log.Debug("ChannelEventStore shutdown complete")
218+
219+
if c.stopped.Swap(true) {
220+
return fmt.Errorf("ChannelEventStore stopped more than once")
221+
}
208222

209223
// Stop the consume goroutine.
210224
close(c.quit)
211225
c.wg.Wait()
212226

213227
// Stop the ticker after the goroutine reading from it has exited, to
214228
// avoid a race.
215-
c.cfg.FlapCountTicker.Stop()
229+
var err error
230+
if c.cfg.FlapCountTicker == nil {
231+
err = fmt.Errorf("ChannelEventStore FlapCountTicker not " +
232+
"initialized")
233+
} else {
234+
c.cfg.FlapCountTicker.Stop()
235+
}
236+
237+
log.Debugf("ChannelEventStore shutdown complete")
238+
239+
return err
216240
}
217241

218242
// addChannel checks whether we are already tracking a channel's peer, creates a

discovery/gossiper.go

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -753,7 +753,12 @@ func (d *AuthenticatedGossiper) stop() {
753753
log.Debug("Authenticated Gossiper is stopping")
754754
defer log.Debug("Authenticated Gossiper stopped")
755755

756-
d.blockEpochs.Cancel()
756+
// `blockEpochs` is only initialized in the start routine so we make
757+
// sure we don't panic here in the case where the `Stop` method is
758+
// called when the `Start` method does not complete.
759+
if d.blockEpochs != nil {
760+
d.blockEpochs.Cancel()
761+
}
757762

758763
d.syncMgr.Stop()
759764

docs/release-notes/release-notes-0.18.3.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,10 @@
3535

3636
* [Fixed a bug](https://github.com/lightningnetwork/lnd/pull/8896) that caused
3737
LND to use a default fee rate for the batch channel opening flow.
38+
39+
* [Fixed](https://github.com/lightningnetwork/lnd/pull/8497) a case where LND
40+
would not shut down properly when interrupted via e.g. SIGTERM. Moreover, LND
41+
now shutsdown correctly in case one subsystem fails to startup.
3842

3943
* The fee limit for payments [was made
4044
compatible](https://github.com/lightningnetwork/lnd/pull/8941) with inbound

graph/builder.go

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -300,6 +300,8 @@ func (b *Builder) Start() error {
300300
b.wg.Add(1)
301301
go b.networkHandler()
302302

303+
log.Debug("Builder started")
304+
303305
return nil
304306
}
305307

@@ -312,7 +314,6 @@ func (b *Builder) Stop() error {
312314
}
313315

314316
log.Info("Builder shutting down...")
315-
defer log.Debug("Builder shutdown complete")
316317

317318
// Our filtered chain view could've only been started if
318319
// AssumeChannelValid isn't present.
@@ -325,6 +326,8 @@ func (b *Builder) Stop() error {
325326
close(b.quit)
326327
b.wg.Wait()
327328

329+
log.Debug("Builder shutdown complete")
330+
328331
return nil
329332
}
330333

htlcswitch/interceptable_switch.go

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import (
44
"crypto/sha256"
55
"fmt"
66
"sync"
7+
"sync/atomic"
78

89
"github.com/go-errors/errors"
910
"github.com/lightningnetwork/lnd/chainntnfs"
@@ -33,6 +34,9 @@ var (
3334
// Settle - routes UpdateFulfillHTLC to the originating link.
3435
// Fail - routes UpdateFailHTLC to the originating link.
3536
type InterceptableSwitch struct {
37+
started atomic.Bool
38+
stopped atomic.Bool
39+
3640
// htlcSwitch is the underline switch
3741
htlcSwitch *Switch
3842

@@ -201,6 +205,12 @@ func (s *InterceptableSwitch) SetInterceptor(
201205
}
202206

203207
func (s *InterceptableSwitch) Start() error {
208+
log.Info("InterceptableSwitch starting...")
209+
210+
if s.started.Swap(true) {
211+
return fmt.Errorf("InterceptableSwitch started more than once")
212+
}
213+
204214
blockEpochStream, err := s.notifier.RegisterBlockEpochNtfn(nil)
205215
if err != nil {
206216
return err
@@ -217,14 +227,28 @@ func (s *InterceptableSwitch) Start() error {
217227
}
218228
}()
219229

230+
log.Debug("InterceptableSwitch started")
231+
220232
return nil
221233
}
222234

223235
func (s *InterceptableSwitch) Stop() error {
236+
log.Info("InterceptableSwitch shutting down...")
237+
238+
if s.stopped.Swap(true) {
239+
return fmt.Errorf("InterceptableSwitch stopped more than once")
240+
}
241+
224242
close(s.quit)
225243
s.wg.Wait()
226244

227-
s.blockEpochStream.Cancel()
245+
// We need to check whether the start routine run and initialized the
246+
// `blockEpochStream`.
247+
if s.blockEpochStream != nil {
248+
s.blockEpochStream.Cancel()
249+
}
250+
251+
log.Debug("InterceptableSwitch shutdown complete")
228252

229253
return nil
230254
}

htlcswitch/link.go

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -572,14 +572,18 @@ func (l *channelLink) Stop() {
572572
}
573573

574574
// Ensure the channel for the timer is drained.
575-
if !l.updateFeeTimer.Stop() {
576-
select {
577-
case <-l.updateFeeTimer.C:
578-
default:
575+
if l.updateFeeTimer != nil {
576+
if !l.updateFeeTimer.Stop() {
577+
select {
578+
case <-l.updateFeeTimer.C:
579+
default:
580+
}
579581
}
580582
}
581583

582-
l.hodlQueue.Stop()
584+
if l.hodlQueue != nil {
585+
l.hodlQueue.Stop()
586+
}
583587

584588
close(l.quit)
585589
l.wg.Wait()

invoices/invoiceregistry.go

Lines changed: 40 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,9 @@ func (r *htlcReleaseEvent) Less(other queue.PriorityQueueItem) bool {
101101
// created by the daemon. The registry is a thin wrapper around a map in order
102102
// to ensure that all updates/reads are thread safe.
103103
type InvoiceRegistry struct {
104+
started atomic.Bool
105+
stopped atomic.Bool
106+
104107
sync.RWMutex
105108

106109
nextClientID uint32 // must be used atomically
@@ -213,42 +216,66 @@ func (i *InvoiceRegistry) scanInvoicesOnStart(ctx context.Context) error {
213216

214217
// Start starts the registry and all goroutines it needs to carry out its task.
215218
func (i *InvoiceRegistry) Start() error {
216-
// Start InvoiceExpiryWatcher and prepopulate it with existing active
217-
// invoices.
218-
err := i.expiryWatcher.Start(func(hash lntypes.Hash, force bool) error {
219-
return i.cancelInvoiceImpl(context.Background(), hash, force)
220-
})
219+
var err error
220+
221+
log.Info("InvoiceRegistry starting...")
222+
223+
if i.started.Swap(true) {
224+
return fmt.Errorf("InvoiceRegistry started more than once")
225+
}
226+
// Start InvoiceExpiryWatcher and prepopulate it with existing
227+
// active invoices.
228+
err = i.expiryWatcher.Start(
229+
func(hash lntypes.Hash, force bool) error {
230+
return i.cancelInvoiceImpl(
231+
context.Background(), hash, force,
232+
)
233+
})
221234
if err != nil {
222235
return err
223236
}
224237

225-
log.Info("InvoiceRegistry starting")
226-
227238
i.wg.Add(1)
228239
go i.invoiceEventLoop()
229240

230-
// Now scan all pending and removable invoices to the expiry watcher or
231-
// delete them.
241+
// Now scan all pending and removable invoices to the expiry
242+
// watcher or delete them.
232243
err = i.scanInvoicesOnStart(context.Background())
233244
if err != nil {
234245
_ = i.Stop()
235-
return err
236246
}
237247

238-
return nil
248+
log.Debug("InvoiceRegistry started")
249+
250+
return err
239251
}
240252

241253
// Stop signals the registry for a graceful shutdown.
242254
func (i *InvoiceRegistry) Stop() error {
255+
log.Info("InvoiceRegistry shutting down...")
256+
257+
if i.stopped.Swap(true) {
258+
return fmt.Errorf("InvoiceRegistry stopped more than once")
259+
}
260+
243261
log.Info("InvoiceRegistry shutting down...")
244262
defer log.Debug("InvoiceRegistry shutdown complete")
245263

246-
i.expiryWatcher.Stop()
264+
var err error
265+
if i.expiryWatcher == nil {
266+
err = fmt.Errorf("InvoiceRegistry expiryWatcher is not " +
267+
"initialized")
268+
} else {
269+
i.expiryWatcher.Stop()
270+
}
247271

248272
close(i.quit)
249273

250274
i.wg.Wait()
251-
return nil
275+
276+
log.Debug("InvoiceRegistry shutdown complete")
277+
278+
return err
252279
}
253280

254281
// invoiceEvent represents a new event that has modified on invoice on disk.

lnd.go

Lines changed: 25 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -694,11 +694,33 @@ func Main(cfg *Config, lisCfg ListenerCfg, implCfg *ImplementationCfg,
694694
bestHeight)
695695

696696
// With all the relevant chains initialized, we can finally start the
697-
// server itself.
698-
if err := server.Start(); err != nil {
697+
// server itself. We start the server in an asynchronous goroutine so
698+
// that we are able to interrupt and shutdown the daemon gracefully in
699+
// case the startup of the subservers do not behave as expected.
700+
errChan := make(chan error)
701+
go func() {
702+
errChan <- server.Start()
703+
}()
704+
705+
defer func() {
706+
err := server.Stop()
707+
if err != nil {
708+
ltndLog.Warnf("Stopping the server including all "+
709+
"its subsystems failed with %v", err)
710+
}
711+
}()
712+
713+
select {
714+
case err := <-errChan:
715+
if err == nil {
716+
break
717+
}
718+
699719
return mkErr("unable to start server: %v", err)
720+
721+
case <-interceptor.ShutdownChannel():
722+
return nil
700723
}
701-
defer server.Stop()
702724

703725
// We transition the server state to Active, as the server is up.
704726
interceptorChain.SetServerActive()

0 commit comments

Comments
 (0)