lightningdevkit · arik-so · Jan 27, 2025 · Jan 19, 2025 · Jan 20, 2025 · Jan 21, 2025
diff --git a/lightning/src/chain/channelmonitor.rs b/lightning/src/chain/channelmonitor.rs
@@ -1023,6 +1023,12 @@ pub(crate) struct ChannelMonitorImpl<Signer: EcdsaChannelSigner> {
 
 	/// The first block height at which we had no remaining claimable balances.
 	balances_empty_height: Option<u32>,
+
+	/// In-memory only HTLC ids used to track upstream HTLCs that have been failed backwards due to
+	/// a downstream channel force-close remaining unconfirmed by the time the upstream timeout
+	/// expires. This is used to tell us we already generated an event to fail this HTLC back
+	/// during a previous block scan.
+	failed_back_htlc_ids: HashSet<SentHTLCId>,
 }
 
 /// Transaction outputs to watch for on-chain spends.
@@ -1445,6 +1451,8 @@ impl<Signer: EcdsaChannelSigner> ChannelMonitor<Signer> {
 			counterparty_node_id: Some(counterparty_node_id),
 			initial_counterparty_commitment_info: None,
 			balances_empty_height: None,
+
+			failed_back_htlc_ids: new_hash_set(),
 		})
 	}
 
@@ -4221,6 +4229,71 @@ impl<Signer: EcdsaChannelSigner> ChannelMonitorImpl<Signer> {
 			}
 		}
 
+		if self.lockdown_from_offchain || self.funding_spend_seen || self.holder_tx_signed {
+			// Fail back HTLCs on backwards channels if they expire within
+			// `LATENCY_GRACE_PERIOD_BLOCKS` blocks and the channel is closed (i.e. we're at a
+			// point where no further off-chain updates will be accepted). If we haven't seen the
+			// preimage for an HTLC by the time the previous hop's timeout expires, we've lost that
+			// HTLC, so we might as well fail it back instead of having our counterparty force-close
+			// the inbound channel.
+			let current_holder_htlcs = self.current_holder_commitment_tx.htlc_outputs.iter()
+				.map(|&(ref a, _, ref b)| (a, b.as_ref()));
+
+			let current_counterparty_htlcs = if let Some(txid) = self.current_counterparty_commitment_txid {
+				if let Some(htlc_outputs) = self.counterparty_claimable_outpoints.get(&txid) {
+					Some(htlc_outputs.iter().map(|&(ref a, ref b)| (a, b.as_ref().map(|boxed| &**boxed))))
+				} else { None }
+			} else { None }.into_iter().flatten();
+
+			let prev_counterparty_htlcs = if let Some(txid) = self.prev_counterparty_commitment_txid {
+				if let Some(htlc_outputs) = self.counterparty_claimable_outpoints.get(&txid) {
+					Some(htlc_outputs.iter().map(|&(ref a, ref b)| (a, b.as_ref().map(|boxed| &**boxed))))
+				} else { None }
+			} else { None }.into_iter().flatten();
+
+			let htlcs = current_holder_htlcs
+				.chain(current_counterparty_htlcs)
+				.chain(prev_counterparty_htlcs);
+
+			let height = self.best_block.height;
+			for (htlc, source_opt) in htlcs {
+				// Only check forwarded HTLCs' previous hops
+				let source = match source_opt {
+					Some(source) => source,
+					None => continue,
+				};
+				let inbound_htlc_expiry = match source.inbound_htlc_expiry() {
+					Some(cltv_expiry) => cltv_expiry,
+					None => continue,
+				};
+				let max_expiry_height = height.saturating_add(LATENCY_GRACE_PERIOD_BLOCKS);
+				if inbound_htlc_expiry > max_expiry_height {
+					continue;
+				}
+				let duplicate_event = self.pending_monitor_events.iter().any(
+					|update| if let &MonitorEvent::HTLCEvent(ref upd) = update {
+						upd.source == *source
+					} else { false });
+				if duplicate_event {
+					continue;
+				}
+				if !self.failed_back_htlc_ids.insert(SentHTLCId::from_source(source)) {
+					continue;
+				}
+				if !duplicate_event {
+					log_error!(logger, "Failing back HTLC {} upstream to preserve the \
+						channel as the forward HTLC hasn't resolved and our backward HTLC \
+						expires soon at {}", log_bytes!(htlc.payment_hash.0), inbound_htlc_expiry);
+					self.pending_monitor_events.push(MonitorEvent::HTLCEvent(HTLCUpdate {
+						source: source.clone(),
+						payment_preimage: None,
+						payment_hash: htlc.payment_hash,
+						htlc_value_satoshis: Some(htlc.amount_msat / 1000),
+					}));
+				}
+			}
+		}
+
 		let conf_target = self.closure_conf_target();
 		self.onchain_tx_handler.update_claims_view_from_requests(claimable_outpoints, conf_height, self.best_block.height, broadcaster, conf_target, fee_estimator, logger);
 		self.onchain_tx_handler.update_claims_view_from_matched_txn(&txn_matched, conf_height, conf_hash, self.best_block.height, broadcaster, conf_target, fee_estimator, logger);
@@ -5066,6 +5139,7 @@ impl<'a, 'b, ES: EntropySource, SP: SignerProvider> ReadableArgs<(&'a ES, &'b SP
 			counterparty_node_id,
 			initial_counterparty_commitment_info,
 			balances_empty_height,
+			failed_back_htlc_ids: new_hash_set(),
 		})))
 	}
 }

diff --git a/lightning/src/ln/functional_tests.rs b/lightning/src/ln/functional_tests.rs
@@ -2270,6 +2270,138 @@ fn channel_reserve_in_flight_removes() {
 	claim_payment(&nodes[0], &[&nodes[1]], payment_preimage_3);
 }
 
+enum PostFailBackAction {
+	TimeoutOnChain,
+	ClaimOnChain,
+	FailOffChain,
+	ClaimOffChain,
+}
+
+#[test]
+fn test_fail_back_before_backwards_timeout() {
+	do_test_fail_back_before_backwards_timeout(PostFailBackAction::TimeoutOnChain);
+	do_test_fail_back_before_backwards_timeout(PostFailBackAction::ClaimOnChain);
+	do_test_fail_back_before_backwards_timeout(PostFailBackAction::FailOffChain);
+	do_test_fail_back_before_backwards_timeout(PostFailBackAction::ClaimOffChain);
+}
+
+fn do_test_fail_back_before_backwards_timeout(post_fail_back_action: PostFailBackAction) {
+	// Test that we fail an HTLC upstream if we are still waiting for confirmation downstream
+	// just before the upstream timeout expires
+	let chanmon_cfgs = create_chanmon_cfgs(3);
+	let node_cfgs = create_node_cfgs(3, &chanmon_cfgs);
+	let node_chanmgrs = create_node_chanmgrs(3, &node_cfgs, &[None, None, None]);
+	let nodes = create_network(3, &node_cfgs, &node_chanmgrs);
+	for node in nodes.iter() {
+		*node.fee_estimator.sat_per_kw.lock().unwrap() = 2000;
+	}
+
+	let node_b_id = nodes[1].node.get_our_node_id();
+	let node_c_id = nodes[2].node.get_our_node_id();
+
+	create_announced_chan_between_nodes(&nodes, 0, 1);
+	let chan_2 = create_announced_chan_between_nodes(&nodes, 1, 2);
+
+	// Start every node on the same block height to make reasoning about timeouts easier
+	connect_blocks(&nodes[0], 2*CHAN_CONFIRM_DEPTH + 1 - nodes[0].best_block_info().1);
+	connect_blocks(&nodes[1], 2*CHAN_CONFIRM_DEPTH + 1 - nodes[1].best_block_info().1);
+	connect_blocks(&nodes[2], 2*CHAN_CONFIRM_DEPTH + 1 - nodes[2].best_block_info().1);
+
+	let (payment_preimage, payment_hash, ..) = route_payment(&nodes[0], &[&nodes[1], &nodes[2]], 3_000_000);
+
+	// Force close the B<->C channel by timing out the HTLC
+	let timeout_blocks = TEST_FINAL_CLTV + LATENCY_GRACE_PERIOD_BLOCKS + 1;
+	connect_blocks(&nodes[1], timeout_blocks);
+	let node_1_txn = test_txn_broadcast(&nodes[1], &chan_2, None, HTLCType::TIMEOUT);
+	check_closed_event(&nodes[1], 1, ClosureReason::HTLCsTimedOut, false, &[node_c_id], 100_000);
+	check_closed_broadcast(&nodes[1], 1, true);
+	check_added_monitors(&nodes[1], 1);
+
+	// After the A<->B HTLC gets within LATENCY_GRACE_PERIOD_BLOCKS we will fail the HTLC to avoid
+	// the channel force-closing. Note that we already connected `TEST_FINAL_CLTV +
+	// LATENCY_GRACE_PERIOD_BLOCKS` blocks above, so we subtract that from the HTLC expiry (which
+	// is `TEST_FINAL_CLTV` + `MIN_CLTV_EXPIRY_DELTA`).
+	let upstream_timeout_blocks = MIN_CLTV_EXPIRY_DELTA as u32 - LATENCY_GRACE_PERIOD_BLOCKS * 2;
+	connect_blocks(&nodes[1], upstream_timeout_blocks);
+
+	// Connect blocks for nodes[0] to make sure they don't go on-chain
+	connect_blocks(&nodes[0], timeout_blocks + upstream_timeout_blocks);
+
+	// Check that nodes[1] fails the HTLC upstream
+	expect_pending_htlcs_forwardable_and_htlc_handling_failed!(nodes[1],
+		vec![HTLCDestination::NextHopChannel {
+			node_id: Some(nodes[2].node.get_our_node_id()),
+			channel_id: chan_2.2
+		}]);
+	check_added_monitors!(nodes[1], 1);
+	let htlc_updates = get_htlc_update_msgs(&nodes[1], &nodes[0].node.get_our_node_id());
+	let msgs::CommitmentUpdate { update_fail_htlcs, commitment_signed, .. } = htlc_updates;
+
+	nodes[0].node.handle_update_fail_htlc(nodes[1].node.get_our_node_id(), &update_fail_htlcs[0]);
+	commitment_signed_dance!(nodes[0], nodes[1], commitment_signed, false);
+	expect_payment_failed_conditions(&nodes[0], payment_hash, false,
+		PaymentFailedConditions::new().blamed_chan_closed(true));
+
+	// Make sure we handle possible duplicate fails or extra messages after failing back
+	match post_fail_back_action {
+		PostFailBackAction::TimeoutOnChain => {
+			// Confirm nodes[1]'s claim with timeout, make sure we don't fail upstream again
+			mine_transaction(&nodes[1], &node_1_txn[0]); // Commitment
+			mine_transaction(&nodes[1], &node_1_txn[1]); // HTLC timeout
+			connect_blocks(&nodes[1], ANTI_REORG_DELAY);
+			// Expect handling another fail back event, but the HTLC is already gone
+			expect_pending_htlcs_forwardable_and_htlc_handling_failed!(nodes[1],
+				vec![HTLCDestination::NextHopChannel {
+					node_id: Some(nodes[2].node.get_our_node_id()),
+					channel_id: chan_2.2
+				}]);
+			assert!(nodes[1].node.get_and_clear_pending_msg_events().is_empty());
+		},
+		PostFailBackAction::ClaimOnChain => {
+			nodes[2].node.claim_funds(payment_preimage);
+			expect_payment_claimed!(nodes[2], payment_hash, 3_000_000);
+			check_added_monitors!(nodes[2], 1);
+			get_htlc_update_msgs(&nodes[2], &nodes[1].node.get_our_node_id());
+
+			connect_blocks(&nodes[2], TEST_FINAL_CLTV - CLTV_CLAIM_BUFFER + 2);
+			let node_2_txn = test_txn_broadcast(&nodes[2], &chan_2, None, HTLCType::SUCCESS);
+			check_closed_broadcast!(nodes[2], true);
+			check_closed_event(&nodes[2], 1, ClosureReason::HTLCsTimedOut, false, &[node_b_id], 100_000);
+			check_added_monitors!(nodes[2], 1);
+
+			mine_transaction(&nodes[1], &node_2_txn[0]); // Commitment
+			mine_transaction(&nodes[1], &node_2_txn[1]); // HTLC success
+			connect_blocks(&nodes[1], ANTI_REORG_DELAY);
+			assert!(nodes[1].node.get_and_clear_pending_msg_events().is_empty());
+		},
+		PostFailBackAction::FailOffChain => {
+			nodes[2].node.fail_htlc_backwards(&payment_hash);
+			expect_pending_htlcs_forwardable_and_htlc_handling_failed!(nodes[2],
+				vec![HTLCDestination::FailedPayment { payment_hash }]);
+			check_added_monitors!(nodes[2], 1);
+			let commitment_update = get_htlc_update_msgs(&nodes[2], &nodes[1].node.get_our_node_id());
+			let update_fail = commitment_update.update_fail_htlcs[0].clone();
+
+			nodes[1].node.handle_update_fail_htlc(nodes[2].node.get_our_node_id(), &update_fail);
+			let err_msg = get_err_msg(&nodes[1], &nodes[2].node.get_our_node_id());
+			assert_eq!(err_msg.channel_id, chan_2.2);
+			assert!(nodes[1].node.get_and_clear_pending_msg_events().is_empty());
+		},
+		PostFailBackAction::ClaimOffChain => {
+			nodes[2].node.claim_funds(payment_preimage);
+			expect_payment_claimed!(nodes[2], payment_hash, 3_000_000);
+			check_added_monitors!(nodes[2], 1);
+			let commitment_update = get_htlc_update_msgs(&nodes[2], &nodes[1].node.get_our_node_id());
+			let update_fulfill = commitment_update.update_fulfill_htlcs[0].clone();
+
+			nodes[1].node.handle_update_fulfill_htlc(nodes[2].node.get_our_node_id(), &update_fulfill);
+			let err_msg = get_err_msg(&nodes[1], &nodes[2].node.get_our_node_id());
+			assert_eq!(err_msg.channel_id, chan_2.2);
+			assert!(nodes[1].node.get_and_clear_pending_msg_events().is_empty());
+		},
+	};
+}
+
 #[test]
 fn channel_monitor_network_test() {
 	// Simple test which builds a network of ChannelManagers, connects them to each other, and
@@ -2374,7 +2506,7 @@ fn channel_monitor_network_test() {
 	let node2_commitment_txid;
 	{
 		let node_txn = test_txn_broadcast(&nodes[2], &chan_3, None, HTLCType::NONE);
-		connect_blocks(&nodes[2], TEST_FINAL_CLTV + LATENCY_GRACE_PERIOD_BLOCKS + MIN_CLTV_EXPIRY_DELTA as u32 + 1);
+		connect_blocks(&nodes[2], TEST_FINAL_CLTV + LATENCY_GRACE_PERIOD_BLOCKS);
 		test_txn_broadcast(&nodes[2], &chan_3, None, HTLCType::TIMEOUT);
 		node2_commitment_txid = node_txn[0].compute_txid();
 
@@ -3312,8 +3444,8 @@ fn do_test_htlc_on_chain_timeout(connect_style: ConnectStyle) {
 	// Broadcast timeout transaction by B on received output from C's commitment tx on B's chain
 	// Verify that B's ChannelManager is able to detect that HTLC is timeout by its own tx and react backward in consequence
 	mine_transaction(&nodes[1], &commitment_tx[0]);
-	check_closed_event!(&nodes[1], 1, ClosureReason::CommitmentTxConfirmed, false
-		, [nodes[2].node.get_our_node_id()], 100000);
+	check_closed_event(&nodes[1], 1, ClosureReason::CommitmentTxConfirmed, false,
+		&[nodes[2].node.get_our_node_id()], 100000);
 	let htlc_expiry = get_monitor!(nodes[1], chan_2.2).get_claimable_balances().iter().filter_map(|bal|
 			if let Balance::MaybeTimeoutClaimableHTLC { claimable_height, .. } = bal {
 				Some(*claimable_height)
@@ -9774,6 +9906,8 @@ fn do_test_tx_confirmed_skipping_blocks_immediate_broadcast(test_height_before_t
 	let mut nodes = create_network(3, &node_cfgs, &node_chanmgrs);
 	*nodes[0].connect_style.borrow_mut() = ConnectStyle::BestBlockFirstSkippingBlocks;
 
+	let node_c_id = nodes[2].node.get_our_node_id();
+
 	create_announced_chan_between_nodes(&nodes, 0, 1);
 	let (chan_announce, _, channel_id, _) = create_announced_chan_between_nodes(&nodes, 1, 2);
 	let (_, payment_hash, ..) = route_payment(&nodes[0], &[&nodes[1], &nodes[2]], 1_000_000);
@@ -9789,7 +9923,7 @@ fn do_test_tx_confirmed_skipping_blocks_immediate_broadcast(test_height_before_t
 
 	let conf_height = nodes[1].best_block_info().1;
 	if !test_height_before_timelock {
-		connect_blocks(&nodes[1], 24 * 6);
+		connect_blocks(&nodes[1], TEST_FINAL_CLTV - LATENCY_GRACE_PERIOD_BLOCKS);
 	}
 	nodes[1].chain_monitor.chain_monitor.transactions_confirmed(
 		&nodes[1].get_block_header(conf_height), &[(0, &node_txn[0])], conf_height);
@@ -9808,10 +9942,6 @@ fn do_test_tx_confirmed_skipping_blocks_immediate_broadcast(test_height_before_t
 			&spending_txn[0]
 		};
 		check_spends!(htlc_tx, node_txn[0]);
-		// We should also generate a SpendableOutputs event with the to_self output (as its
-		// timelock is up).
-		let descriptor_spend_txn = check_spendable_outputs!(nodes[1], node_cfgs[1].keys_manager);
-		assert_eq!(descriptor_spend_txn.len(), 1);
 
 		// If we also discover that the HTLC-Timeout transaction was confirmed some time ago, we
 		// should immediately fail-backwards the HTLC to the previous hop, without waiting for an
@@ -9830,6 +9960,18 @@ fn do_test_tx_confirmed_skipping_blocks_immediate_broadcast(test_height_before_t
 		nodes[0].node.handle_update_fail_htlc(nodes[1].node.get_our_node_id(), &updates.update_fail_htlcs[0]);
 		commitment_signed_dance!(nodes[0], nodes[1], updates.commitment_signed, true, true);
 		expect_payment_failed_with_update!(nodes[0], payment_hash, false, chan_announce.contents.short_channel_id, true);
+
+		// We should also generate a SpendableOutputs event with the to_self output (once the
+		// timelock is up).
+		connect_blocks(&nodes[1], (BREAKDOWN_TIMEOUT as u32) - TEST_FINAL_CLTV + LATENCY_GRACE_PERIOD_BLOCKS - 1);
+		let descriptor_spend_txn = check_spendable_outputs!(nodes[1], node_cfgs[1].keys_manager);
+		assert_eq!(descriptor_spend_txn.len(), 1);
+
+		// When the HTLC times out on the A<->B edge, the B<->C channel will fail the HTLC back to
+		// avoid the A<->B channel closing (even though it already has). This will generate a
+		// spurious HTLCHandlingFailed event.
+		expect_pending_htlcs_forwardable_and_htlc_handling_failed!(nodes[1],
+			vec![HTLCDestination::NextHopChannel { node_id: Some(node_c_id), channel_id }]);
 	}
 }