Skip to content

Commit 30443a4

Browse files
committed
rules: improve channel-restriction resilience
The channel-restriction rule was previously fragile because it failed to initialize when a restricted channel was closed while the session was inactive. This often caused unnecessary session invalidation and blocked users from managing their nodes. This change makes the rule resilient by allowing it to start even if some channels in the deny-list are missing from the node's current active set. To maintain high performance, this implements a negative cache that tracks unknown channel IDs, shielding LND from redundant RPC calls during request evaluation. Only having a negative cache without invalidation can be a security problem. Someone could apply a rule with a future guessed channel id such that the channel restriction populates the checkedIDs map with it. After the channel was opened, we'd then allow making actions on the channel because we don't know about the channel's id in the getChannelID check. To ensure security isn't compromised by the cache, this adds a self-healing retry mechanism. If the firewall encounters an unknown channel outpoint while it still has unmapped restricted IDs, it clears the negative cache and forces a single retry in the next RPC call. This ensures that any newly opened restricted channels are correctly identified and blocked without adding latency to the common path. Note: This approach deliberately accepts potential cache thrashing in the edge case where a user repeatedly requests an unknown channel point while a permanently missing ID exists in the deny list. This trade-off is accepted to prioritize security (fail close) over performance in this specific invalid state.
1 parent 7e0c04f commit 30443a4

File tree

2 files changed

+245
-37
lines changed

2 files changed

+245
-37
lines changed

rules/channel_restrictions.go

Lines changed: 81 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -38,14 +38,22 @@ type ChannelRestrictMgr struct {
3838
// chanPointToID is a map from channel point to channel ID's for our
3939
// known set of channels.
4040
chanPointToID map[string]uint64
41-
mu sync.Mutex
41+
42+
// checkedIDs tracks channel IDs that we have already attempted to find
43+
// in LND's list of open channels but were not present.
44+
checkedIDs map[uint64]bool
45+
46+
// mu is a mutex used to protect the maps and other state in the manager
47+
// from concurrent access.
48+
mu sync.Mutex
4249
}
4350

4451
// NewChannelRestrictMgr constructs a new instance of a ChannelRestrictMgr.
4552
func NewChannelRestrictMgr() *ChannelRestrictMgr {
4653
return &ChannelRestrictMgr{
4754
chanIDToPoint: make(map[uint64]string),
4855
chanPointToID: make(map[string]uint64),
56+
checkedIDs: make(map[uint64]bool),
4957
}
5058
}
5159

@@ -72,10 +80,13 @@ func (c *ChannelRestrictMgr) NewEnforcer(ctx context.Context, cfg Config,
7280
chanMap := make(map[uint64]bool, len(channels.DenyList))
7381
for _, chanID := range channels.DenyList {
7482
chanMap[chanID] = true
75-
err := c.maybeUpdateChannelMaps(ctx, cfg, chanID)
76-
if err != nil {
77-
return nil, err
78-
}
83+
}
84+
85+
// We'll attempt to update our internal channel maps for any IDs in our
86+
// deny list that we don't already know about and haven't checked yet.
87+
err := c.maybeUpdateChannelMaps(ctx, cfg, channels.DenyList)
88+
if err != nil {
89+
return nil, err
7990
}
8091

8192
return &ChannelRestrictEnforcer{
@@ -118,17 +129,26 @@ func (c *ChannelRestrictMgr) EmptyValue() Values {
118129
}
119130

120131
// maybeUpdateChannelMaps updates the ChannelRestrictMgrs set of known channels
121-
// iff the channel given by the caller is not found in the current map set.
132+
// iff any of the channels given by the caller are not found in the current
133+
// map set and have not been checked previously.
122134
func (c *ChannelRestrictMgr) maybeUpdateChannelMaps(ctx context.Context,
123-
cfg Config, chanID uint64) error {
135+
cfg Config, chanIDs []uint64) error {
124136

125137
c.mu.Lock()
126138
defer c.mu.Unlock()
127139

128-
// If we already know of this channel, we don't need to go update our
129-
// maps.
130-
_, ok := c.chanIDToPoint[chanID]
131-
if ok {
140+
var needsSync bool
141+
for _, id := range chanIDs {
142+
_, known := c.chanIDToPoint[id]
143+
if !known && !c.checkedIDs[id] {
144+
needsSync = true
145+
break
146+
}
147+
}
148+
149+
// If we already know about all these channels or have checked them,
150+
// then we don't need to do anything.
151+
if !needsSync {
132152
return nil
133153
}
134154

@@ -139,28 +159,18 @@ func (c *ChannelRestrictMgr) maybeUpdateChannelMaps(ctx context.Context,
139159
return err
140160
}
141161

142-
var (
143-
found bool
144-
point string
145-
id uint64
146-
)
147-
148-
// Update our set of maps and also make sure that the channel specified
149-
// by the caller is valid given our set of open channels.
162+
// Update our set of maps with all currently open channels.
150163
for _, channel := range chans {
151-
point = channel.ChannelPoint
152-
id = channel.ChannelID
153-
154-
c.chanPointToID[point] = id
155-
c.chanIDToPoint[id] = point
156-
157-
if id == chanID {
158-
found = true
159-
}
164+
c.chanPointToID[channel.ChannelPoint] = channel.ChannelID
165+
c.chanIDToPoint[channel.ChannelID] = channel.ChannelPoint
160166
}
161167

162-
if !found {
163-
return fmt.Errorf("invalid channel ID")
168+
// For every ID we were looking for, if it's still not in our known
169+
// maps, we mark it as checked so we don't trigger another sync for it.
170+
for _, id := range chanIDs {
171+
if _, ok := c.chanIDToPoint[id]; !ok {
172+
c.checkedIDs[id] = true
173+
}
164174
}
165175

166176
return nil
@@ -174,6 +184,29 @@ func (c *ChannelRestrictMgr) getChannelID(point string) (uint64, bool) {
174184
return id, ok
175185
}
176186

187+
// hasCheckedIDs returns true if any of the given channel IDs are present in the
188+
// manager's checkedIDs map.
189+
func (c *ChannelRestrictMgr) hasCheckedIDs(chanIDs []uint64) bool {
190+
c.mu.Lock()
191+
defer c.mu.Unlock()
192+
193+
for _, id := range chanIDs {
194+
if c.checkedIDs[id] {
195+
return true
196+
}
197+
}
198+
199+
return false
200+
}
201+
202+
// clearCheckedIDs clears the manager's set of checked IDs.
203+
func (c *ChannelRestrictMgr) clearCheckedIDs() {
204+
c.mu.Lock()
205+
defer c.mu.Unlock()
206+
207+
c.checkedIDs = make(map[uint64]bool)
208+
}
209+
177210
// ChannelRestrictEnforcer enforces requests and responses against a
178211
// ChannelRestrict rule.
179212
type ChannelRestrictEnforcer struct {
@@ -280,6 +313,24 @@ func (c *ChannelRestrictEnforcer) checkers() map[string]mid.RoundTripChecker {
280313

281314
id, ok := c.mgr.getChannelID(point)
282315
if !ok {
316+
// If we don't know the channel ID for
317+
// this outpoint, it's possible that our
318+
// cache is stale. If we have any
319+
// channels in our deny list that we
320+
// haven't been able to map to an
321+
// outpoint yet, we'll clear the
322+
// negative cache and return an error.
323+
// This ensures that the next request
324+
// will trigger a fresh sync.
325+
if c.mgr.hasCheckedIDs(c.DenyList) {
326+
c.mgr.clearCheckedIDs()
327+
328+
return fmt.Errorf("unknown " +
329+
"channel point, " +
330+
"please retry the " +
331+
"request")
332+
}
333+
283334
return nil
284335
}
285336

rules/channel_restrictions_test.go

Lines changed: 164 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -310,20 +310,177 @@ func TestChannelRestrictResilience(t *testing.T) {
310310
mgr = NewChannelRestrictMgr()
311311
)
312312

313+
// Set up two channel points and IDs.
314+
txid1, index1, err := newTXID()
315+
require.NoError(t, err)
316+
chanPointStr1 := fmt.Sprintf("%s:%d", hex.EncodeToString(txid1), index1)
313317
chanID1, _ := firewalldb.NewPseudoUint64()
318+
chanPoint1 := &lnrpc.ChannelPoint{
319+
FundingTxid: &lnrpc.ChannelPoint_FundingTxidStr{
320+
FundingTxidStr: hex.EncodeToString(txid1),
321+
},
322+
OutputIndex: index1,
323+
}
314324

315-
// Initially, LND has no channels.
325+
txid2, index2, err := newTXID()
326+
require.NoError(t, err)
327+
chanPointStr2 := fmt.Sprintf("%s:%d", hex.EncodeToString(txid2), index2)
328+
chanID2, _ := firewalldb.NewPseudoUint64()
329+
chanPoint2 := &lnrpc.ChannelPoint{
330+
FundingTxid: &lnrpc.ChannelPoint_FundingTxidStr{
331+
FundingTxidStr: hex.EncodeToString(txid2),
332+
},
333+
OutputIndex: index2,
334+
}
335+
336+
// Request: A request that tries to fetch a channel point that is not
337+
// known yet. We expect the manager to try to refresh the channel list
338+
// again to find the missing channel. The negative cache is empty at
339+
// this point. The call fails because chanPoint2 is not known yet.
316340
cfg := &mockLndClient{}
317341
cfg.On(
318342
"ListChannels", mock.Anything, mock.Anything, mock.Anything,
319343
mock.Anything,
320-
).Return([]lndclient.ChannelInfo{}, nil)
344+
).Return(
345+
[]lndclient.ChannelInfo{
346+
// Initially we only have chanID1 open. Somebody was
347+
// able to guess chanID2 even though it's not open yet.
348+
{
349+
ChannelID: chanID1,
350+
ChannelPoint: chanPointStr1,
351+
},
352+
}, nil)
353+
354+
// Each time a request comes in, a new enforcer is created.
355+
enf, err := mgr.NewEnforcer(ctx, cfg, &ChannelRestrict{
356+
DenyList: []uint64{chanID2},
357+
})
358+
require.NoError(t, err)
359+
360+
_, err = enf.HandleRequest(
361+
ctx, "/lnrpc.Lightning/UpdateChannelPolicy",
362+
&lnrpc.PolicyUpdateRequest{
363+
Scope: &lnrpc.PolicyUpdateRequest_ChanPoint{
364+
ChanPoint: chanPoint2,
365+
},
366+
},
367+
)
368+
369+
// The request fails because the manager doesn't know about the mapping
370+
// of chanPoint2 to chanID2. The negative cache is reset to force a
371+
// reload of the mapping on the next request.
372+
require.ErrorContains(t, err, "unknown channel point, please retry "+
373+
"the request")
374+
cfg.AssertExpectations(t)
375+
376+
// Request: Another request that tries to fetch a known channel point.
377+
// We expect another call to ListChannels to refresh the mapping, since
378+
// the negative cache was cleared after the last failed request.
379+
cfg = &mockLndClient{}
380+
cfg.On(
381+
"ListChannels", mock.Anything, mock.Anything, mock.Anything,
382+
mock.Anything,
383+
).Return(
384+
[]lndclient.ChannelInfo{
385+
{
386+
ChannelID: chanID1,
387+
ChannelPoint: chanPointStr1,
388+
},
389+
}, nil)
390+
391+
enf, err = mgr.NewEnforcer(ctx, cfg, &ChannelRestrict{
392+
DenyList: []uint64{chanID2},
393+
})
394+
require.NoError(t, err)
395+
396+
_, err = enf.HandleRequest(
397+
ctx, "/lnrpc.Lightning/UpdateChannelPolicy",
398+
&lnrpc.PolicyUpdateRequest{
399+
Scope: &lnrpc.PolicyUpdateRequest_ChanPoint{
400+
ChanPoint: chanPoint1,
401+
},
402+
},
403+
)
404+
require.NoError(t, err)
405+
cfg.AssertExpectations(t)
406+
407+
// Request: In case we retry the request for the unknown channel, we
408+
// should error again. This time we don't expect another call to
409+
// ListChannels because the negative cache was not invalidated before.
410+
cfg = &mockLndClient{}
411+
enf, err = mgr.NewEnforcer(ctx, cfg, &ChannelRestrict{
412+
DenyList: []uint64{chanID2},
413+
})
414+
require.NoError(t, err)
415+
416+
_, err = enf.HandleRequest(
417+
ctx, "/lnrpc.Lightning/UpdateChannelPolicy",
418+
&lnrpc.PolicyUpdateRequest{
419+
Scope: &lnrpc.PolicyUpdateRequest_ChanPoint{
420+
ChanPoint: chanPoint2,
421+
},
422+
},
423+
)
424+
425+
// The call errors, which invalidates the negative cache again.
426+
require.ErrorContains(t, err, "unknown channel point, please retry "+
427+
"the request")
428+
cfg.AssertExpectations(t)
429+
430+
// We simulate the channel getting confirmed.
431+
cfg = &mockLndClient{}
432+
cfg.On(
433+
"ListChannels", mock.Anything, mock.Anything, mock.Anything,
434+
mock.Anything,
435+
).Return(
436+
[]lndclient.ChannelInfo{
437+
{
438+
ChannelID: chanID1,
439+
ChannelPoint: chanPointStr1,
440+
},
441+
{
442+
ChannelID: chanID2,
443+
ChannelPoint: chanPointStr2,
444+
},
445+
}, nil)
321446

322-
// We create an enforcer that denies chanID1 (maybe a closed channel or
323-
// generally unknown). This will be fixed in a future commit.
324-
_, err := mgr.NewEnforcer(ctx, cfg, &ChannelRestrict{
325-
DenyList: []uint64{chanID1},
447+
// Request: Now the channel is known and in the deny list. The manager
448+
// resyncs the channel list again and should now know about chanID2
449+
// mapping to chanPoint2.
450+
enf, err = mgr.NewEnforcer(ctx, cfg, &ChannelRestrict{
451+
DenyList: []uint64{chanID2},
326452
})
327-
require.ErrorContains(t, err, "invalid channel ID")
453+
require.NoError(t, err)
454+
455+
// The request gets blocked.
456+
_, err = enf.HandleRequest(
457+
ctx, "/lnrpc.Lightning/UpdateChannelPolicy",
458+
&lnrpc.PolicyUpdateRequest{
459+
Scope: &lnrpc.PolicyUpdateRequest_ChanPoint{
460+
ChanPoint: chanPoint2,
461+
},
462+
},
463+
)
464+
require.ErrorContains(t, err, "illegal action on channel in channel "+
465+
"restriction list")
466+
cfg.AssertExpectations(t)
467+
468+
// Request: Request to a channel not in the deny list. It should be
469+
// allowed, without fetching the channel list again.
470+
cfg = &mockLndClient{}
471+
enf, err = mgr.NewEnforcer(ctx, cfg, &ChannelRestrict{
472+
DenyList: []uint64{chanID2},
473+
})
474+
require.NoError(t, err)
475+
476+
_, err = enf.HandleRequest(
477+
ctx, "/lnrpc.Lightning/UpdateChannelPolicy",
478+
&lnrpc.PolicyUpdateRequest{
479+
Scope: &lnrpc.PolicyUpdateRequest_ChanPoint{
480+
ChanPoint: chanPoint1,
481+
},
482+
},
483+
)
484+
require.NoError(t, err)
328485
cfg.AssertExpectations(t)
329486
}

0 commit comments

Comments
 (0)