rules: improve channel-restriction resilience

bitromortac · bitromortac · commit 30443a471111 · 2026-02-03T16:36:09.000+01:00
The channel-restriction rule was previously fragile because it failed to
initialize when a restricted channel was closed while the session was
inactive. This often caused unnecessary session invalidation and blocked
users from managing their nodes.

This change makes the rule resilient by allowing it to start even if
some channels in the deny-list are missing from the node's current
active set. To maintain high performance, this implements a negative
cache that tracks unknown channel IDs, shielding LND from redundant RPC
calls during request evaluation.

Only having a negative cache without invalidation can be a security
problem. Someone could apply a rule with a future guessed channel id
such that the channel restriction populates the checkedIDs map with it.
After the channel was opened, we'd then allow making actions on the
channel because we don't know about the channel's id in the getChannelID
check.

To ensure security isn't compromised by the cache, this adds a
self-healing retry mechanism. If the firewall encounters an unknown
channel outpoint while it still has unmapped restricted IDs, it clears
the negative cache and forces a single retry in the next RPC call. This
ensures that any newly opened restricted channels are correctly
identified and blocked without adding latency to the common path.

Note: This approach deliberately accepts potential cache thrashing in the
edge case where a user repeatedly requests an unknown channel point
while a permanently missing ID exists in the deny list. This trade-off
is accepted to prioritize security (fail close) over performance in this
specific invalid state.
diff --git a/rules/channel_restrictions.go b/rules/channel_restrictions.go
@@ -38,14 +38,22 @@ type ChannelRestrictMgr struct {
 	// chanPointToID is a map from channel point to channel ID's for our
 	// known set of channels.
 	chanPointToID map[string]uint64
-	mu            sync.Mutex
+
+	// checkedIDs tracks channel IDs that we have already attempted to find
+	// in LND's list of open channels but were not present.
+	checkedIDs map[uint64]bool
+
+	// mu is a mutex used to protect the maps and other state in the manager
+	// from concurrent access.
+	mu sync.Mutex
 }
 
 // NewChannelRestrictMgr constructs a new instance of a ChannelRestrictMgr.
 func NewChannelRestrictMgr() *ChannelRestrictMgr {
 	return &ChannelRestrictMgr{
 		chanIDToPoint: make(map[uint64]string),
 		chanPointToID: make(map[string]uint64),
+		checkedIDs:    make(map[uint64]bool),
 	}
 }
 
@@ -72,10 +80,13 @@ func (c *ChannelRestrictMgr) NewEnforcer(ctx context.Context, cfg Config,
 	chanMap := make(map[uint64]bool, len(channels.DenyList))
 	for _, chanID := range channels.DenyList {
 		chanMap[chanID] = true
-		err := c.maybeUpdateChannelMaps(ctx, cfg, chanID)
-		if err != nil {
-			return nil, err
-		}
+	}
+
+	// We'll attempt to update our internal channel maps for any IDs in our
+	// deny list that we don't already know about and haven't checked yet.
+	err := c.maybeUpdateChannelMaps(ctx, cfg, channels.DenyList)
+	if err != nil {
+		return nil, err
 	}
 
 	return &ChannelRestrictEnforcer{
@@ -118,17 +129,26 @@ func (c *ChannelRestrictMgr) EmptyValue() Values {
 }
 
 // maybeUpdateChannelMaps updates the ChannelRestrictMgrs set of known channels
-// iff the channel given by the caller is not found in the current map set.
+// iff any of the channels given by the caller are not found in the current
+// map set and have not been checked previously.
 func (c *ChannelRestrictMgr) maybeUpdateChannelMaps(ctx context.Context,
-	cfg Config, chanID uint64) error {
+	cfg Config, chanIDs []uint64) error {
 
 	c.mu.Lock()
 	defer c.mu.Unlock()
 
-	// If we already know of this channel, we don't need to go update our
-	// maps.
-	_, ok := c.chanIDToPoint[chanID]
-	if ok {
+	var needsSync bool
+	for _, id := range chanIDs {
+		_, known := c.chanIDToPoint[id]
+		if !known && !c.checkedIDs[id] {
+			needsSync = true
+			break
+		}
+	}
+
+	// If we already know about all these channels or have checked them,
+	// then we don't need to do anything.
+	if !needsSync {
 		return nil
 	}
 
@@ -139,28 +159,18 @@ func (c *ChannelRestrictMgr) maybeUpdateChannelMaps(ctx context.Context,
 		return err
 	}
 
-	var (
-		found bool
-		point string
-		id    uint64
-	)
-
-	// Update our set of maps and also make sure that the channel specified
-	// by the caller is valid given our set of open channels.
+	// Update our set of maps with all currently open channels.
 	for _, channel := range chans {
-		point = channel.ChannelPoint
-		id = channel.ChannelID
-
-		c.chanPointToID[point] = id
-		c.chanIDToPoint[id] = point
-
-		if id == chanID {
-			found = true
-		}
+		c.chanPointToID[channel.ChannelPoint] = channel.ChannelID
+		c.chanIDToPoint[channel.ChannelID] = channel.ChannelPoint
 	}
 
-	if !found {
-		return fmt.Errorf("invalid channel ID")
+	// For every ID we were looking for, if it's still not in our known
+	// maps, we mark it as checked so we don't trigger another sync for it.
+	for _, id := range chanIDs {
+		if _, ok := c.chanIDToPoint[id]; !ok {
+			c.checkedIDs[id] = true
+		}
 	}
 
 	return nil
@@ -174,6 +184,29 @@ func (c *ChannelRestrictMgr) getChannelID(point string) (uint64, bool) {
 	return id, ok
 }
 
+// hasCheckedIDs returns true if any of the given channel IDs are present in the
+// manager's checkedIDs map.
+func (c *ChannelRestrictMgr) hasCheckedIDs(chanIDs []uint64) bool {
+	c.mu.Lock()
+	defer c.mu.Unlock()
+
+	for _, id := range chanIDs {
+		if c.checkedIDs[id] {
+			return true
+		}
+	}
+
+	return false
+}
+
+// clearCheckedIDs clears the manager's set of checked IDs.
+func (c *ChannelRestrictMgr) clearCheckedIDs() {
+	c.mu.Lock()
+	defer c.mu.Unlock()
+
+	c.checkedIDs = make(map[uint64]bool)
+}
+
 // ChannelRestrictEnforcer enforces requests and responses against a
 // ChannelRestrict rule.
 type ChannelRestrictEnforcer struct {
@@ -280,6 +313,24 @@ func (c *ChannelRestrictEnforcer) checkers() map[string]mid.RoundTripChecker {
 
 				id, ok := c.mgr.getChannelID(point)
 				if !ok {
+					// If we don't know the channel ID for
+					// this outpoint, it's possible that our
+					// cache is stale. If we have any
+					// channels in our deny list that we
+					// haven't been able to map to an
+					// outpoint yet, we'll clear the
+					// negative cache and return an error.
+					// This ensures that the next request
+					// will trigger a fresh sync.
+					if c.mgr.hasCheckedIDs(c.DenyList) {
+						c.mgr.clearCheckedIDs()
+
+						return fmt.Errorf("unknown " +
+							"channel point, " +
+							"please retry the " +
+							"request")
+					}
+
 					return nil
 				}
 
diff --git a/rules/channel_restrictions_test.go b/rules/channel_restrictions_test.go
@@ -310,20 +310,177 @@ func TestChannelRestrictResilience(t *testing.T) {
 		mgr = NewChannelRestrictMgr()
 	)
 
+	// Set up two channel points and IDs.
+	txid1, index1, err := newTXID()
+	require.NoError(t, err)
+	chanPointStr1 := fmt.Sprintf("%s:%d", hex.EncodeToString(txid1), index1)
 	chanID1, _ := firewalldb.NewPseudoUint64()
+	chanPoint1 := &lnrpc.ChannelPoint{
+		FundingTxid: &lnrpc.ChannelPoint_FundingTxidStr{
+			FundingTxidStr: hex.EncodeToString(txid1),
+		},
+		OutputIndex: index1,
+	}
 
-	// Initially, LND has no channels.
+	txid2, index2, err := newTXID()
+	require.NoError(t, err)
+	chanPointStr2 := fmt.Sprintf("%s:%d", hex.EncodeToString(txid2), index2)
+	chanID2, _ := firewalldb.NewPseudoUint64()
+	chanPoint2 := &lnrpc.ChannelPoint{
+		FundingTxid: &lnrpc.ChannelPoint_FundingTxidStr{
+			FundingTxidStr: hex.EncodeToString(txid2),
+		},
+		OutputIndex: index2,
+	}
+
+	// Request: A request that tries to fetch a channel point that is not
+	// known yet. We expect the manager to try to refresh the channel list
+	// again to find the missing channel. The negative cache is empty at
+	// this point. The call fails because chanPoint2 is not known yet.
 	cfg := &mockLndClient{}
 	cfg.On(
 		"ListChannels", mock.Anything, mock.Anything, mock.Anything,
 		mock.Anything,
-	).Return([]lndclient.ChannelInfo{}, nil)
+	).Return(
+		[]lndclient.ChannelInfo{
+			// Initially we only have chanID1 open. Somebody was
+			// able to guess chanID2 even though it's not open yet.
+			{
+				ChannelID:    chanID1,
+				ChannelPoint: chanPointStr1,
+			},
+		}, nil)
+
+	// Each time a request comes in, a new enforcer is created.
+	enf, err := mgr.NewEnforcer(ctx, cfg, &ChannelRestrict{
+		DenyList: []uint64{chanID2},
+	})
+	require.NoError(t, err)
+
+	_, err = enf.HandleRequest(
+		ctx, "/lnrpc.Lightning/UpdateChannelPolicy",
+		&lnrpc.PolicyUpdateRequest{
+			Scope: &lnrpc.PolicyUpdateRequest_ChanPoint{
+				ChanPoint: chanPoint2,
+			},
+		},
+	)
+
+	// The request fails because the manager doesn't know about the mapping
+	// of chanPoint2 to chanID2. The negative cache is reset to force a
+	// reload of the mapping on the next request.
+	require.ErrorContains(t, err, "unknown channel point, please retry "+
+		"the request")
+	cfg.AssertExpectations(t)
+
+	// Request: Another request that tries to fetch a known channel point.
+	// We expect another call to ListChannels to refresh the mapping, since
+	// the negative cache was cleared after the last failed request.
+	cfg = &mockLndClient{}
+	cfg.On(
+		"ListChannels", mock.Anything, mock.Anything, mock.Anything,
+		mock.Anything,
+	).Return(
+		[]lndclient.ChannelInfo{
+			{
+				ChannelID:    chanID1,
+				ChannelPoint: chanPointStr1,
+			},
+		}, nil)
+
+	enf, err = mgr.NewEnforcer(ctx, cfg, &ChannelRestrict{
+		DenyList: []uint64{chanID2},
+	})
+	require.NoError(t, err)
+
+	_, err = enf.HandleRequest(
+		ctx, "/lnrpc.Lightning/UpdateChannelPolicy",
+		&lnrpc.PolicyUpdateRequest{
+			Scope: &lnrpc.PolicyUpdateRequest_ChanPoint{
+				ChanPoint: chanPoint1,
+			},
+		},
+	)
+	require.NoError(t, err)
+	cfg.AssertExpectations(t)
+
+	// Request: In case we retry the request for the unknown channel, we
+	// should error again. This time we don't expect another call to
+	// ListChannels because the negative cache was not invalidated before.
+	cfg = &mockLndClient{}
+	enf, err = mgr.NewEnforcer(ctx, cfg, &ChannelRestrict{
+		DenyList: []uint64{chanID2},
+	})
+	require.NoError(t, err)
+
+	_, err = enf.HandleRequest(
+		ctx, "/lnrpc.Lightning/UpdateChannelPolicy",
+		&lnrpc.PolicyUpdateRequest{
+			Scope: &lnrpc.PolicyUpdateRequest_ChanPoint{
+				ChanPoint: chanPoint2,
+			},
+		},
+	)
+
+	// The call errors, which invalidates the negative cache again.
+	require.ErrorContains(t, err, "unknown channel point, please retry "+
+		"the request")
+	cfg.AssertExpectations(t)
+
+	// We simulate the channel getting confirmed.
+	cfg = &mockLndClient{}
+	cfg.On(
+		"ListChannels", mock.Anything, mock.Anything, mock.Anything,
+		mock.Anything,
+	).Return(
+		[]lndclient.ChannelInfo{
+			{
+				ChannelID:    chanID1,
+				ChannelPoint: chanPointStr1,
+			},
+			{
+				ChannelID:    chanID2,
+				ChannelPoint: chanPointStr2,
+			},
+		}, nil)
 
-	// We create an enforcer that denies chanID1 (maybe a closed channel or
-	// generally unknown). This will be fixed in a future commit.
-	_, err := mgr.NewEnforcer(ctx, cfg, &ChannelRestrict{
-		DenyList: []uint64{chanID1},
+	// Request: Now the channel is known and in the deny list. The manager
+	// resyncs the channel list again and should now know about chanID2
+	// mapping to chanPoint2.
+	enf, err = mgr.NewEnforcer(ctx, cfg, &ChannelRestrict{
+		DenyList: []uint64{chanID2},
 	})
-	require.ErrorContains(t, err, "invalid channel ID")
+	require.NoError(t, err)
+
+	// The request gets blocked.
+	_, err = enf.HandleRequest(
+		ctx, "/lnrpc.Lightning/UpdateChannelPolicy",
+		&lnrpc.PolicyUpdateRequest{
+			Scope: &lnrpc.PolicyUpdateRequest_ChanPoint{
+				ChanPoint: chanPoint2,
+			},
+		},
+	)
+	require.ErrorContains(t, err, "illegal action on channel in channel "+
+		"restriction list")
+	cfg.AssertExpectations(t)
+
+	// Request: Request to a channel not in the deny list. It should be
+	// allowed, without fetching the channel list again.
+	cfg = &mockLndClient{}
+	enf, err = mgr.NewEnforcer(ctx, cfg, &ChannelRestrict{
+		DenyList: []uint64{chanID2},
+	})
+	require.NoError(t, err)
+
+	_, err = enf.HandleRequest(
+		ctx, "/lnrpc.Lightning/UpdateChannelPolicy",
+		&lnrpc.PolicyUpdateRequest{
+			Scope: &lnrpc.PolicyUpdateRequest_ChanPoint{
+				ChanPoint: chanPoint1,
+			},
+		},
+	)
+	require.NoError(t, err)
 	cfg.AssertExpectations(t)
 }