Skip to content

Commit 188aa9a

Browse files
committed
routing+lnd: prepare closed channel SCIDs in server
The method `FetchClosedChannels` sometimes prematurely mark a pending force closing channel as finalized, therefore we need to furthur check `FetchPendingChannels` to make sure the channel is indeed finalized.
1 parent e8f292e commit 188aa9a

File tree

3 files changed

+83
-53
lines changed

3 files changed

+83
-53
lines changed

routing/router.go

Lines changed: 11 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -286,12 +286,10 @@ type Config struct {
286286
// graph that we received from a payment failure.
287287
ApplyChannelUpdate func(msg *lnwire.ChannelUpdate) bool
288288

289-
// FetchClosedChannels is used by the router to fetch closed channels.
289+
// ClosedSCIDs is used by the router to fetch closed channels.
290290
//
291-
// TODO(yy): remove this method once the root cause of stuck payments
292-
// is found.
293-
FetchClosedChannels func(pendingOnly bool) (
294-
[]*channeldb.ChannelCloseSummary, error)
291+
// TODO(yy): remove it once the root cause of stuck payments is found.
292+
ClosedSCIDs map[lnwire.ShortChannelID]struct{}
295293
}
296294

297295
// EdgeLocator is a struct used to identify a specific edge.
@@ -1391,19 +1389,6 @@ func (r *ChannelRouter) BuildRoute(amt fn.Option[lnwire.MilliSatoshi],
13911389
// resumePayments fetches inflight payments and resumes their payment
13921390
// lifecycles.
13931391
func (r *ChannelRouter) resumePayments() error {
1394-
// Get a list of closed channels.
1395-
channels, err := r.cfg.FetchClosedChannels(false)
1396-
if err != nil {
1397-
return err
1398-
}
1399-
1400-
closedSCIDs := make(map[lnwire.ShortChannelID]struct{}, len(channels))
1401-
for _, c := range channels {
1402-
if !c.IsPending {
1403-
closedSCIDs[c.ShortChanID] = struct{}{}
1404-
}
1405-
}
1406-
14071392
// Get all payments that are inflight.
14081393
payments, err := r.cfg.Control.FetchInFlightPayments()
14091394
if err != nil {
@@ -1422,9 +1407,7 @@ func (r *ChannelRouter) resumePayments() error {
14221407

14231408
// Try to fail the attempt if the route contains a dead
14241409
// channel.
1425-
r.failStaleAttempt(
1426-
a, p.Info.PaymentIdentifier, closedSCIDs,
1427-
)
1410+
r.failStaleAttempt(a, p.Info.PaymentIdentifier)
14281411
}
14291412
}
14301413

@@ -1510,7 +1493,7 @@ func (r *ChannelRouter) resumePayments() error {
15101493
// - https://github.com/lightningnetwork/lnd/issues/8146
15111494
// - https://github.com/lightningnetwork/lnd/pull/8174
15121495
func (r *ChannelRouter) failStaleAttempt(a channeldb.HTLCAttempt,
1513-
payHash lntypes.Hash, closedSCIDs map[lnwire.ShortChannelID]struct{}) {
1496+
payHash lntypes.Hash) {
15141497

15151498
// We can only fail inflight HTLCs so we skip the settled/failed ones.
15161499
if a.Failure != nil || a.Settle != nil {
@@ -1571,12 +1554,12 @@ func (r *ChannelRouter) failStaleAttempt(a channeldb.HTLCAttempt,
15711554
}
15721555

15731556
// The channel link is not active, we now check whether this
1574-
// channel is already closed. If so, we fail it as there's no
1575-
// need to wait for the network result because it won't be
1576-
// re-sent. If the channel is still pending, we'll keep waiting
1577-
// for the result as we may get a contract resolution for this
1578-
// HTLC.
1579-
if _, ok := closedSCIDs[scid]; ok {
1557+
// channel is already closed. If so, we fail the HTLC attempt
1558+
// as there's no need to wait for its network result because
1559+
// there's no link. If the channel is still pending, we'll keep
1560+
// waiting for the result as we may get a contract resolution
1561+
// for this HTLC.
1562+
if _, ok := r.cfg.ClosedSCIDs[scid]; ok {
15801563
shouldFail = true
15811564
}
15821565
}

routing/router_test.go

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -93,9 +93,7 @@ func (c *testCtx) getChannelIDFromAlias(t *testing.T, a, b string) uint64 {
9393
return channelID
9494
}
9595

96-
func mockFetchClosedChannels(_ bool) ([]*channeldb.ChannelCloseSummary, error) {
97-
return nil, nil
98-
}
96+
var mockClosedSCIDs map[lnwire.ShortChannelID]struct{}
9997

10098
func createTestCtxFromGraphInstance(t *testing.T, startingHeight uint32,
10199
graphInstance *testGraphInstance) *testCtx {
@@ -162,10 +160,10 @@ func createTestCtxFromGraphInstanceAssumeValid(t *testing.T,
162160
next := atomic.AddUint64(&uniquePaymentID, 1)
163161
return next, nil
164162
},
165-
PathFindingConfig: pathFindingConfig,
166-
Clock: clock.NewTestClock(time.Unix(1, 0)),
167-
ApplyChannelUpdate: graphBuilder.ApplyChannelUpdate,
168-
FetchClosedChannels: mockFetchClosedChannels,
163+
PathFindingConfig: pathFindingConfig,
164+
Clock: clock.NewTestClock(time.Unix(1, 0)),
165+
ApplyChannelUpdate: graphBuilder.ApplyChannelUpdate,
166+
ClosedSCIDs: mockClosedSCIDs,
169167
})
170168
require.NoError(t, router.Start(), "unable to start router")
171169

@@ -2175,7 +2173,7 @@ func TestSendToRouteSkipTempErrSuccess(t *testing.T) {
21752173
NextPaymentID: func() (uint64, error) {
21762174
return 0, nil
21772175
},
2178-
FetchClosedChannels: mockFetchClosedChannels,
2176+
ClosedSCIDs: mockClosedSCIDs,
21792177
}}
21802178

21812179
// Register mockers with the expected method calls.
@@ -2259,7 +2257,7 @@ func TestSendToRouteSkipTempErrNonMPP(t *testing.T) {
22592257
NextPaymentID: func() (uint64, error) {
22602258
return 0, nil
22612259
},
2262-
FetchClosedChannels: mockFetchClosedChannels,
2260+
ClosedSCIDs: mockClosedSCIDs,
22632261
}}
22642262

22652263
// Expect an error to be returned.
@@ -2314,7 +2312,7 @@ func TestSendToRouteSkipTempErrTempFailure(t *testing.T) {
23142312
NextPaymentID: func() (uint64, error) {
23152313
return 0, nil
23162314
},
2317-
FetchClosedChannels: mockFetchClosedChannels,
2315+
ClosedSCIDs: mockClosedSCIDs,
23182316
}}
23192317

23202318
// Create the error to be returned.
@@ -2397,7 +2395,7 @@ func TestSendToRouteSkipTempErrPermanentFailure(t *testing.T) {
23972395
NextPaymentID: func() (uint64, error) {
23982396
return 0, nil
23992397
},
2400-
FetchClosedChannels: mockFetchClosedChannels,
2398+
ClosedSCIDs: mockClosedSCIDs,
24012399
}}
24022400

24032401
// Create the error to be returned.
@@ -2484,7 +2482,7 @@ func TestSendToRouteTempFailure(t *testing.T) {
24842482
NextPaymentID: func() (uint64, error) {
24852483
return 0, nil
24862484
},
2487-
FetchClosedChannels: mockFetchClosedChannels,
2485+
ClosedSCIDs: mockClosedSCIDs,
24882486
}}
24892487

24902488
// Create the error to be returned.

server.go

Lines changed: 62 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -993,19 +993,19 @@ func newServer(cfg *Config, listenAddrs []net.Addr,
993993
}
994994

995995
s.chanRouter, err = routing.New(routing.Config{
996-
SelfNode: selfNode.PubKeyBytes,
997-
RoutingGraph: graphsession.NewRoutingGraph(chanGraph),
998-
Chain: cc.ChainIO,
999-
Payer: s.htlcSwitch,
1000-
Control: s.controlTower,
1001-
MissionControl: s.missionControl,
1002-
SessionSource: paymentSessionSource,
1003-
GetLink: s.htlcSwitch.GetLinkByShortID,
1004-
NextPaymentID: sequencer.NextID,
1005-
PathFindingConfig: pathFindingConfig,
1006-
Clock: clock.NewDefaultClock(),
1007-
ApplyChannelUpdate: s.graphBuilder.ApplyChannelUpdate,
1008-
FetchClosedChannels: s.chanStateDB.FetchClosedChannels,
996+
SelfNode: selfNode.PubKeyBytes,
997+
RoutingGraph: graphsession.NewRoutingGraph(chanGraph),
998+
Chain: cc.ChainIO,
999+
Payer: s.htlcSwitch,
1000+
Control: s.controlTower,
1001+
MissionControl: s.missionControl,
1002+
SessionSource: paymentSessionSource,
1003+
GetLink: s.htlcSwitch.GetLinkByShortID,
1004+
NextPaymentID: sequencer.NextID,
1005+
PathFindingConfig: pathFindingConfig,
1006+
Clock: clock.NewDefaultClock(),
1007+
ApplyChannelUpdate: s.graphBuilder.ApplyChannelUpdate,
1008+
ClosedSCIDs: s.fetchClosedChannelSCIDs(),
10091009
})
10101010
if err != nil {
10111011
return nil, fmt.Errorf("can't create router: %w", err)
@@ -4830,3 +4830,52 @@ func shouldPeerBootstrap(cfg *Config) bool {
48304830
// covering the bootstrapping process.
48314831
return !cfg.NoNetBootstrap && !isDevNetwork
48324832
}
4833+
4834+
// fetchClosedChannelSCIDs returns a set of SCIDs that have their force closing
4835+
// finished.
4836+
func (s *server) fetchClosedChannelSCIDs() map[lnwire.ShortChannelID]struct{} {
4837+
// Get a list of closed channels.
4838+
channels, err := s.chanStateDB.FetchClosedChannels(false)
4839+
if err != nil {
4840+
srvrLog.Errorf("Failed to fetch closed channels: %v", err)
4841+
return nil
4842+
}
4843+
4844+
// Save the SCIDs in a map.
4845+
closedSCIDs := make(map[lnwire.ShortChannelID]struct{}, len(channels))
4846+
for _, c := range channels {
4847+
// If the channel is not pending, its FC has been finalized.
4848+
if !c.IsPending {
4849+
closedSCIDs[c.ShortChanID] = struct{}{}
4850+
}
4851+
}
4852+
4853+
// Double check whether the reported closed channel has indeed finished
4854+
// closing.
4855+
//
4856+
// NOTE: There are misalignments regarding when a channel's FC is
4857+
// marked as finalized. We double check the pending channels to make
4858+
// sure the returned SCIDs are indeed terminated.
4859+
//
4860+
// TODO(yy): fix the misalignments in `FetchClosedChannels`.
4861+
pendings, err := s.chanStateDB.FetchPendingChannels()
4862+
if err != nil {
4863+
srvrLog.Errorf("Failed to fetch pending channels: %v", err)
4864+
return nil
4865+
}
4866+
4867+
for _, c := range pendings {
4868+
if _, ok := closedSCIDs[c.ShortChannelID]; !ok {
4869+
continue
4870+
}
4871+
4872+
// If the channel is still reported as pending, remove it from
4873+
// the map.
4874+
delete(closedSCIDs, c.ShortChannelID)
4875+
4876+
srvrLog.Warnf("Channel=%v is prematurely marked as finalized",
4877+
c.ShortChannelID)
4878+
}
4879+
4880+
return closedSCIDs
4881+
}

0 commit comments

Comments
 (0)