Skip to content

Commit ec5b824

Browse files
authored
Merge pull request #8406 from ziggie1984/fix-channel-opening-issue
Fix case where Opening Channels get stuck forever.
2 parents e1259cd + 13e557d commit ec5b824

File tree

10 files changed

+218
-6
lines changed

10 files changed

+218
-6
lines changed

docs/release-notes/release-notes-0.17.4.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,12 @@
1919

2020
# Bug Fixes
2121

22+
* [Fix the removal of failed
23+
channels](https://github.com/lightningnetwork/lnd/pull/8406). When a pending
24+
channel opening was pruned from memory no more channels were able to be
25+
created nor accepted. This PR fixes this issue and enhances the test suite
26+
for this behavior.
27+
2228
# New Features
2329
## Functional Enhancements
2430
## RPC Additions

itest/list_on_test.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -562,4 +562,8 @@ var allTestCases = []*lntest.TestCase{
562562
Name: "removetx",
563563
TestFunc: testRemoveTx,
564564
},
565+
{
566+
Name: "fail funding flow psbt",
567+
TestFunc: testPsbtChanFundingFailFlow,
568+
},
565569
}

itest/lnd_psbt_test.go

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ import (
2121
"github.com/lightningnetwork/lnd/lnrpc/walletrpc"
2222
"github.com/lightningnetwork/lnd/lntest"
2323
"github.com/lightningnetwork/lnd/lntest/node"
24+
"github.com/lightningnetwork/lnd/lnwallet/chanfunding"
2425
"github.com/stretchr/testify/require"
2526
)
2627

@@ -1340,3 +1341,50 @@ func sendAllCoinsToAddrType(ht *lntest.HarnessTest,
13401341
ht.MineBlocksAndAssertNumTxes(1, 1)
13411342
ht.WaitForBlockchainSync(hn)
13421343
}
1344+
1345+
// testPsbtChanFundingFailFlow tests the failing of a funding flow by the
1346+
// remote peer and that the initiator receives the expected error and aborts
1347+
// the channel opening. The psbt funding flow is used to simulate this behavior
1348+
// because we can easily let the remote peer run into the timeout.
1349+
func testPsbtChanFundingFailFlow(ht *lntest.HarnessTest) {
1350+
const chanSize = funding.MaxBtcFundingAmount
1351+
1352+
// Decrease the timeout window for the remote peer to accelerate the
1353+
// funding fail process.
1354+
args := []string{
1355+
"--dev.reservationtimeout=1s",
1356+
"--dev.zombiesweeperinterval=1s",
1357+
}
1358+
ht.RestartNodeWithExtraArgs(ht.Bob, args)
1359+
1360+
// Before we start the test, we'll ensure both sides are connected so
1361+
// the funding flow can be properly executed.
1362+
alice := ht.Alice
1363+
bob := ht.Bob
1364+
ht.EnsureConnected(alice, bob)
1365+
1366+
// At this point, we can begin our PSBT channel funding workflow. We'll
1367+
// start by generating a pending channel ID externally that will be used
1368+
// to track this new funding type.
1369+
pendingChanID := ht.Random32Bytes()
1370+
1371+
// Now that we have the pending channel ID, Alice will open the channel
1372+
// by specifying a PSBT shim.
1373+
chanUpdates, _ := ht.OpenChannelPsbt(
1374+
alice, bob, lntest.OpenChannelParams{
1375+
Amt: chanSize,
1376+
FundingShim: &lnrpc.FundingShim{
1377+
Shim: &lnrpc.FundingShim_PsbtShim{
1378+
PsbtShim: &lnrpc.PsbtShim{
1379+
PendingChanId: pendingChanID,
1380+
},
1381+
},
1382+
},
1383+
},
1384+
)
1385+
1386+
// We received the AcceptChannel msg from our peer but we are not going
1387+
// to fund this channel but instead wait for our peer to fail the
1388+
// funding workflow with an internal error.
1389+
ht.ReceiveOpenChannelError(chanUpdates, chanfunding.ErrRemoteCanceled)
1390+
}

lncfg/config.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import (
55
"os/user"
66
"path/filepath"
77
"strings"
8+
"time"
89
)
910

1011
const (
@@ -67,6 +68,14 @@ const (
6768
// peer and a block arriving during that round trip to trigger force
6869
// closure.
6970
DefaultOutgoingCltvRejectDelta = DefaultOutgoingBroadcastDelta + 3
71+
72+
// DefaultReservationTimeout is the default time we wait until we remove
73+
// an unfinished (zombiestate) open channel flow from memory.
74+
DefaultReservationTimeout = 10 * time.Minute
75+
76+
// DefaultZombieSweeperInterval is the default time interval at which
77+
// unfinished (zombiestate) open channel flows are purged from memory.
78+
DefaultZombieSweeperInterval = 1 * time.Minute
7079
)
7180

7281
// CleanAndExpandPath expands environment variables and leading ~ in the

lncfg/dev.go

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,9 @@
22

33
package lncfg
44

5-
import "time"
5+
import (
6+
"time"
7+
)
68

79
// IsDevBuild returns a bool to indicate whether we are in a development
810
// environment.
@@ -21,3 +23,13 @@ type DevConfig struct{}
2123
func (d *DevConfig) ChannelReadyWait() time.Duration {
2224
return 0
2325
}
26+
27+
// GetReservationTimeout returns the config value for `ReservationTimeout`.
28+
func (d *DevConfig) GetReservationTimeout() time.Duration {
29+
return DefaultReservationTimeout
30+
}
31+
32+
// GetZombieSweeperInterval returns the config value for`ZombieSweeperInterval`.
33+
func (d *DevConfig) GetZombieSweeperInterval() time.Duration {
34+
return DefaultZombieSweeperInterval
35+
}

lncfg/dev_integration.go

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,9 @@
22

33
package lncfg
44

5-
import "time"
5+
import (
6+
"time"
7+
)
68

79
// IsDevBuild returns a bool to indicate whether we are in a development
810
// environment.
@@ -18,9 +20,29 @@ func IsDevBuild() bool {
1820
//nolint:lll
1921
type DevConfig struct {
2022
ProcessChannelReadyWait time.Duration `long:"processchannelreadywait" description:"Time to sleep before processing remote node's channel_ready message."`
23+
ReservationTimeout time.Duration `long:"reservationtimeout" description:"The maximum time we keep a pending channel open flow in memory."`
24+
ZombieSweeperInterval time.Duration `long:"zombiesweeperinterval" description:"The time interval at which channel opening flows are evaluated for zombie status."`
2125
}
2226

2327
// ChannelReadyWait returns the config value `ProcessChannelReadyWait`.
2428
func (d *DevConfig) ChannelReadyWait() time.Duration {
2529
return d.ProcessChannelReadyWait
2630
}
31+
32+
// GetReservationTimeout returns the config value for `ReservationTimeout`.
33+
func (d *DevConfig) GetReservationTimeout() time.Duration {
34+
if d.ReservationTimeout == 0 {
35+
return DefaultReservationTimeout
36+
}
37+
38+
return d.ReservationTimeout
39+
}
40+
41+
// GetZombieSweeperInterval returns the config value for`ZombieSweeperInterval`.
42+
func (d *DevConfig) GetZombieSweeperInterval() time.Duration {
43+
if d.ZombieSweeperInterval == 0 {
44+
return DefaultZombieSweeperInterval
45+
}
46+
47+
return d.ZombieSweeperInterval
48+
}

lntest/harness_assertion.go

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -289,6 +289,16 @@ func (h *HarnessTest) ReceiveOpenChannelUpdate(
289289
return update
290290
}
291291

292+
// ReceiveOpenChannelError waits for the expected error during the open channel
293+
// flow from the peer or times out.
294+
func (h *HarnessTest) ReceiveOpenChannelError(
295+
stream rpc.OpenChanClient, expectedErr error) {
296+
297+
_, err := h.receiveOpenChannelUpdate(stream)
298+
require.Contains(h, err.Error(), expectedErr.Error(),
299+
"error not matched")
300+
}
301+
292302
// receiveOpenChannelUpdate waits until a message or an error is received on
293303
// the stream or the timeout is reached.
294304
//

peer/brontide.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -506,8 +506,9 @@ func NewBrontide(cfg Config) *Brontide {
506506
activeChannels: &lnutils.SyncMap[
507507
lnwire.ChannelID, *lnwallet.LightningChannel,
508508
]{},
509-
newActiveChannel: make(chan *newChannelMsg, 1),
510-
newPendingChannel: make(chan *newChannelMsg, 1),
509+
newActiveChannel: make(chan *newChannelMsg, 1),
510+
newPendingChannel: make(chan *newChannelMsg, 1),
511+
removePendingChannel: make(chan *newChannelMsg),
511512

512513
activeMsgStreams: make(map[lnwire.ChannelID]*msgStream),
513514
activeChanCloses: make(map[lnwire.ChannelID]*chancloser.ChanCloser),

peer/brontide_test.go

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package peer
22

33
import (
44
"bytes"
5+
"fmt"
56
"testing"
67
"time"
78

@@ -16,6 +17,7 @@ import (
1617
"github.com/lightningnetwork/lnd/contractcourt"
1718
"github.com/lightningnetwork/lnd/htlcswitch"
1819
"github.com/lightningnetwork/lnd/lntest/mock"
20+
"github.com/lightningnetwork/lnd/lntest/wait"
1921
"github.com/lightningnetwork/lnd/lnwallet"
2022
"github.com/lightningnetwork/lnd/lnwallet/chancloser"
2123
"github.com/lightningnetwork/lnd/lnwire"
@@ -1451,3 +1453,88 @@ func TestStartupWriteMessageRace(t *testing.T) {
14511453
}
14521454
}
14531455
}
1456+
1457+
// TestRemovePendingChannel checks that we are able to remove a pending channel
1458+
// successfully from the peers channel map. This also makes sure the
1459+
// removePendingChannel is initialized so we don't send to a nil channel and
1460+
// get stuck.
1461+
func TestRemovePendingChannel(t *testing.T) {
1462+
t.Parallel()
1463+
1464+
// Set up parameters for createTestPeer.
1465+
notifier := &mock.ChainNotifier{
1466+
SpendChan: make(chan *chainntnfs.SpendDetail),
1467+
EpochChan: make(chan *chainntnfs.BlockEpoch),
1468+
ConfChan: make(chan *chainntnfs.TxConfirmation),
1469+
}
1470+
broadcastTxChan := make(chan *wire.MsgTx)
1471+
mockSwitch := &mockMessageSwitch{}
1472+
1473+
// createTestPeer creates a peer and a channel with that peer.
1474+
peer, _, err := createTestPeer(
1475+
t, notifier, broadcastTxChan, noUpdate, mockSwitch,
1476+
)
1477+
require.NoError(t, err, "unable to create test channel")
1478+
1479+
// Add a pending channel to the peer Alice.
1480+
errChan := make(chan error, 1)
1481+
pendingChanID := lnwire.ChannelID{1}
1482+
req := &newChannelMsg{
1483+
channelID: pendingChanID,
1484+
err: errChan,
1485+
}
1486+
1487+
select {
1488+
case peer.newPendingChannel <- req:
1489+
// Operation completed successfully
1490+
case <-time.After(timeout):
1491+
t.Fatalf("not able to remove pending channel")
1492+
}
1493+
1494+
// Make sure the channel was added as a pending channel.
1495+
// The peer was already created with one active channel therefore the
1496+
// `activeChannels` had already one channel prior to adding the new one.
1497+
// The `addedChannels` map only tracks new channels in the current life
1498+
// cycle therefore the initial channel is not part of it.
1499+
err = wait.NoError(func() error {
1500+
if peer.activeChannels.Len() == 2 &&
1501+
peer.addedChannels.Len() == 1 {
1502+
1503+
return nil
1504+
}
1505+
1506+
return fmt.Errorf("pending channel not successfully added")
1507+
}, wait.DefaultTimeout)
1508+
1509+
require.NoError(t, err)
1510+
1511+
// Now try to remove it, the errChan needs to be reopened because it was
1512+
// closed during the pending channel registration above.
1513+
errChan = make(chan error, 1)
1514+
req = &newChannelMsg{
1515+
channelID: pendingChanID,
1516+
err: errChan,
1517+
}
1518+
1519+
select {
1520+
case peer.removePendingChannel <- req:
1521+
// Operation completed successfully
1522+
case <-time.After(timeout):
1523+
t.Fatalf("not able to remove pending channel")
1524+
}
1525+
1526+
// Make sure the pending channel is successfully removed from both
1527+
// channel maps.
1528+
// The initial channel between the peer is still active at this point.
1529+
err = wait.NoError(func() error {
1530+
if peer.activeChannels.Len() == 1 &&
1531+
peer.addedChannels.Len() == 0 {
1532+
1533+
return nil
1534+
}
1535+
1536+
return fmt.Errorf("pending channel not successfully removed")
1537+
}, wait.DefaultTimeout)
1538+
1539+
require.NoError(t, err)
1540+
}

server.go

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1270,13 +1270,26 @@ func newServer(cfg *Config, listenAddrs []net.Addr,
12701270
return ourPolicy, err
12711271
}
12721272

1273+
// For the reservationTimeout and the zombieSweeperInterval different
1274+
// values are set in case we are in a dev environment so enhance test
1275+
// capacilities.
1276+
reservationTimeout := lncfg.DefaultReservationTimeout
1277+
zombieSweeperInterval := lncfg.DefaultZombieSweeperInterval
1278+
12731279
// Get the development config for funding manager. If we are not in
12741280
// development mode, this would be nil.
12751281
var devCfg *funding.DevConfig
12761282
if lncfg.IsDevBuild() {
12771283
devCfg = &funding.DevConfig{
12781284
ProcessChannelReadyWait: cfg.Dev.ChannelReadyWait(),
12791285
}
1286+
1287+
reservationTimeout = cfg.Dev.GetReservationTimeout()
1288+
zombieSweeperInterval = cfg.Dev.GetZombieSweeperInterval()
1289+
1290+
srvrLog.Debugf("Using the dev config for the fundingMgr: %v, "+
1291+
"reservationTimeout=%v, zombieSweeperInterval=%v",
1292+
devCfg, reservationTimeout, zombieSweeperInterval)
12801293
}
12811294

12821295
//nolint:lll
@@ -1436,8 +1449,8 @@ func newServer(cfg *Config, listenAddrs []net.Addr,
14361449
// channel bandwidth.
14371450
return uint16(input.MaxHTLCNumber / 2)
14381451
},
1439-
ZombieSweeperInterval: 1 * time.Minute,
1440-
ReservationTimeout: 10 * time.Minute,
1452+
ZombieSweeperInterval: zombieSweeperInterval,
1453+
ReservationTimeout: reservationTimeout,
14411454
MinChanSize: btcutil.Amount(cfg.MinChanSize),
14421455
MaxChanSize: btcutil.Amount(cfg.MaxChanSize),
14431456
MaxPendingChannels: cfg.MaxPendingChannels,

0 commit comments

Comments
 (0)