Skip to content

Commit 168700c

Browse files
committed
rfq: only shut down on critical error
Fixes #1010. With this commit we introduce a new fn.CriticalError type and only send RFQ errors to the main error channel (which causes the daemon to shut down) if an error is critical to the operation and can't just be logged.
1 parent fb6c014 commit 168700c

File tree

2 files changed

+89
-15
lines changed

2 files changed

+89
-15
lines changed

fn/errors.go

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package fn
22

33
import (
44
"context"
5+
"errors"
56
"strings"
67

78
"google.golang.org/grpc/codes"
@@ -44,3 +45,42 @@ func IsRpcErr(err error, candidate error) bool {
4445

4546
return strings.Contains(err.Error(), candidate.Error())
4647
}
48+
49+
// CriticalError is an error type that should be used for errors that are
50+
// critical and should cause the application to exit.
51+
type CriticalError struct {
52+
Err error
53+
}
54+
55+
// NewCriticalError creates a new CriticalError instance.
56+
func NewCriticalError(err error) *CriticalError {
57+
return &CriticalError{Err: err}
58+
}
59+
60+
// Error implements the error interface.
61+
func (e *CriticalError) Error() string {
62+
return e.Err.Error()
63+
}
64+
65+
// Unwrap implements the errors.Wrapper interface.
66+
func (e *CriticalError) Unwrap() error {
67+
return e.Err
68+
}
69+
70+
// ErrorAs behaves the same as `errors.As` except there's no need to declare
71+
// the target error as a variable first.
72+
// Instead of writing:
73+
//
74+
// var targetErr *TargetErr
75+
// errors.As(err, &targetErr)
76+
//
77+
// We can write:
78+
//
79+
// lnutils.ErrorAs[*TargetErr](err)
80+
//
81+
// To save us from declaring the target error variable.
82+
func ErrorAs[Target error](err error) bool {
83+
var targetErr Target
84+
85+
return errors.As(err, &targetErr)
86+
}

rfq/manager.go

Lines changed: 49 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -239,6 +239,18 @@ func (m *Manager) startSubsystems(ctx context.Context) error {
239239
return err
240240
}
241241

242+
// handleError logs an error and sends it to the main server error channel if
243+
// it is a critical error.
244+
func (m *Manager) handleError(err error) {
245+
log.Errorf("Error in RFQ manager: %v", err)
246+
247+
// If the error is a critical error, send it to the main server error
248+
// channel, which will cause the daemon to shut down.
249+
if fn.ErrorAs[*fn.CriticalError](err) {
250+
m.cfg.ErrChan <- err
251+
}
252+
}
253+
242254
// Start attempts to start a new RFQ manager.
243255
func (m *Manager) Start() error {
244256
var startErr error
@@ -363,8 +375,10 @@ func (m *Manager) handleIncomingMessage(incomingMsg rfqmsg.IncomingMsg) error {
363375
*msg.Request.AssetID, msg.Peer,
364376
)
365377
if err != nil {
366-
m.cfg.ErrChan <- fmt.Errorf("error adding "+
367-
"local alias: %w", err)
378+
m.handleError(
379+
fmt.Errorf("error adding local alias: "+
380+
"%w", err),
381+
)
368382
return
369383
}
370384

@@ -488,7 +502,12 @@ func (m *Manager) addScidAlias(scidAlias uint64, assetID asset.ID,
488502
ctxb := context.Background()
489503
localChans, err := m.cfg.ChannelLister.ListChannels(ctxb)
490504
if err != nil {
491-
return fmt.Errorf("error listing local channels: %w", err)
505+
// Not being able to call lnd to add the alias is a critical
506+
// error, which warrants shutting down, as something is wrong.
507+
return fn.NewCriticalError(
508+
fmt.Errorf("add alias: error listing local channels: "+
509+
"%w", err),
510+
)
492511
}
493512

494513
// Filter for channels with the given peer.
@@ -534,15 +553,26 @@ func (m *Manager) addScidAlias(scidAlias uint64, assetID asset.ID,
534553
// At this point, if the base SCID is still not found, we return an
535554
// error. We can't map the SCID alias to a base SCID.
536555
if baseSCID == 0 {
537-
return fmt.Errorf("base SCID not found for asset: %v", assetID)
556+
return fmt.Errorf("add alias: base SCID not found for asset: "+
557+
"%v", assetID)
538558
}
539559

540560
log.Debugf("Adding SCID alias %d for base SCID %d", scidAlias, baseSCID)
541561

542-
return m.cfg.AliasManager.AddLocalAlias(
562+
err = m.cfg.AliasManager.AddLocalAlias(
543563
ctxb, lnwire.NewShortChanIDFromInt(scidAlias),
544564
lnwire.NewShortChanIDFromInt(baseSCID),
545565
)
566+
if err != nil {
567+
// Not being able to call lnd to add the alias is a critical
568+
// error, which warrants shutting down, as something is wrong.
569+
return fn.NewCriticalError(
570+
fmt.Errorf("add alias: error adding SCID alias to "+
571+
"lnd alias manager: %w", err),
572+
)
573+
}
574+
575+
return nil
546576
}
547577

548578
// mainEventLoop is the main event loop of the RFQ manager.
@@ -556,8 +586,10 @@ func (m *Manager) mainEventLoop() {
556586

557587
err := m.handleIncomingMessage(incomingMsg)
558588
if err != nil {
559-
m.cfg.ErrChan <- fmt.Errorf("failed to "+
560-
"handle incoming message: %w", err)
589+
m.handleError(
590+
fmt.Errorf("failed to handle "+
591+
"incoming message: %w", err),
592+
)
561593
}
562594

563595
// Handle outgoing message.
@@ -567,8 +599,10 @@ func (m *Manager) mainEventLoop() {
567599

568600
err := m.handleOutgoingMessage(outgoingMsg)
569601
if err != nil {
570-
m.cfg.ErrChan <- fmt.Errorf("failed to "+
571-
"handle outgoing message: %w", err)
602+
m.handleError(
603+
fmt.Errorf("failed to handle outgoing "+
604+
"message: %w", err),
605+
)
572606
}
573607

574608
case acceptHtlcEvent := <-m.acceptHtlcEvents:
@@ -577,12 +611,12 @@ func (m *Manager) mainEventLoop() {
577611

578612
// Handle subsystem errors.
579613
case err := <-m.subsystemErrChan:
580-
log.Errorf("Manager main event loop received "+
581-
"subsystem error: %v", err)
582-
583-
// Report the subsystem error to the main server.
584-
m.cfg.ErrChan <- fmt.Errorf("encountered RFQ "+
585-
"subsystem error: %w", err)
614+
// Report the subsystem error to the main server, in
615+
// case the root cause is a critical error.
616+
m.handleError(
617+
fmt.Errorf("encountered RFQ subsystem error "+
618+
"in main event loop: %w", err),
619+
)
586620

587621
case <-m.Quit:
588622
log.Debug("Manager main event loop has received the " +

0 commit comments

Comments
 (0)