Skip to content

Commit da85861

Browse files
committed
Re-fail perm-failed HTLCs on startup in case of MonitorEvent loss
`MonitorEvent`s aren't delivered to the `ChannelManager` in a durable fashion - if the `ChannelManager` fetches the pending `MonitorEvent`s, then the `ChannelMonitor` gets persisted (i.e. due to a block update) then the node crashes, prior to persisting the `ChannelManager` again, the `MonitorEvent` and its effects on the `ChannelManger` will be lost. This isn't likely in a sync persist environment, but in an async one this could be an issue. Note that this is only an issue for closed channels - `MonitorEvent`s only inform the `ChannelManager` that a channel is closed (which the `ChannelManager` will learn on startup or when it next tries to advance the channel state), that `ChannelMonitorUpdate` writes completed (which the `ChannelManager` will detect on startup), or that HTLCs resolved on-chain post closure. Of the three, only the last is problematic to lose prior to a reload. In a previous commit we handled the case of claimed HTLCs by replaying payment preimages on startup to avoid `MonitorEvent` loss causing us to miss an HTLC claim. Here we handle the HTLC-failed case similarly. Unlike with HTLC claims via preimage, we don't already have replay logic in `ChannelManager` startup, but its easy enough to add one. Luckily, we already track when an HTLC reaches permanently-failed state in `ChannelMonitor` (i.e. it has `ANTI_REORG_DELAY` confirmations on-chain on the failing transaction), so all we need to do is add the ability to query for that and fail them on `ChannelManager` startup. Backport of f809e6c Resolved conflicts in: * lightning/src/chain/channelmonitor.rs due to splicing-related changes in the upstream branch, * lightning/src/ln/channelmanager.rs due to lack of the `LocalHTLCFailureReason` type in this branch, and * lightning/src/ln/monitor_tests.rs due to changes to upstream bump events and commitment announcement logic.
1 parent 4b4aad2 commit da85861

File tree

3 files changed

+385
-2
lines changed

3 files changed

+385
-2
lines changed

lightning/src/chain/channelmonitor.rs

Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2580,6 +2580,122 @@ impl<Signer: EcdsaChannelSigner> ChannelMonitor<Signer> {
25802580
res
25812581
}
25822582

2583+
/// Gets the set of outbound HTLCs which hit the chain and ultimately were claimed by us via
2584+
/// the timeout path and reached [`ANTI_REORG_DELAY`] confirmations. This is used to determine
2585+
/// if an HTLC has failed without the `ChannelManager` having seen it prior to being persisted.
2586+
pub(crate) fn get_onchain_failed_outbound_htlcs(&self) -> HashMap<HTLCSource, PaymentHash> {
2587+
let mut res = new_hash_map();
2588+
let us = self.inner.lock().unwrap();
2589+
2590+
// We only want HTLCs with ANTI_REORG_DELAY confirmations, which implies the commitment
2591+
// transaction has least ANTI_REORG_DELAY confirmations for any dependent HTLC transactions
2592+
// to have been confirmed.
2593+
let confirmed_txid = us.funding_spend_confirmed.or_else(|| {
2594+
us.onchain_events_awaiting_threshold_conf.iter().find_map(|event| {
2595+
if let OnchainEvent::FundingSpendConfirmation { .. } = event.event {
2596+
if event.height + ANTI_REORG_DELAY - 1 <= us.best_block.height {
2597+
Some(event.txid)
2598+
} else {
2599+
None
2600+
}
2601+
} else {
2602+
None
2603+
}
2604+
})
2605+
});
2606+
2607+
let confirmed_txid = if let Some(txid) = confirmed_txid {
2608+
txid
2609+
} else {
2610+
return res;
2611+
};
2612+
2613+
macro_rules! walk_htlcs {
2614+
($htlc_iter: expr) => {
2615+
let mut walk_candidate_htlcs = |htlcs| {
2616+
for &(ref candidate_htlc, ref candidate_source) in htlcs {
2617+
let candidate_htlc: &HTLCOutputInCommitment = &candidate_htlc;
2618+
let candidate_source: &Option<Box<HTLCSource>> = &candidate_source;
2619+
2620+
let source: &HTLCSource = if let Some(source) = candidate_source {
2621+
source
2622+
} else {
2623+
continue;
2624+
};
2625+
let confirmed = $htlc_iter.find(|(_, conf_src)| Some(source) == *conf_src);
2626+
if let Some((confirmed_htlc, _)) = confirmed {
2627+
let filter = |v: &&IrrevocablyResolvedHTLC| {
2628+
v.commitment_tx_output_idx
2629+
== confirmed_htlc.transaction_output_index
2630+
};
2631+
2632+
// The HTLC was included in the confirmed commitment transaction, so we
2633+
// need to see if it has been irrevocably failed yet.
2634+
if confirmed_htlc.transaction_output_index.is_none() {
2635+
// Dust HTLCs are always implicitly failed once the commitment
2636+
// transaction reaches ANTI_REORG_DELAY confirmations.
2637+
res.insert(source.clone(), confirmed_htlc.payment_hash);
2638+
} else if let Some(state) =
2639+
us.htlcs_resolved_on_chain.iter().filter(filter).next()
2640+
{
2641+
if state.payment_preimage.is_none() {
2642+
res.insert(source.clone(), confirmed_htlc.payment_hash);
2643+
}
2644+
}
2645+
} else {
2646+
// The HTLC was not included in the confirmed commitment transaction,
2647+
// which has now reached ANTI_REORG_DELAY confirmations and thus the
2648+
// HTLC has been failed.
2649+
res.insert(source.clone(), candidate_htlc.payment_hash);
2650+
}
2651+
}
2652+
};
2653+
2654+
// We walk the set of HTLCs in the unrevoked counterparty commitment transactions (see
2655+
// `fail_unbroadcast_htlcs` for a description of why).
2656+
if let Some(ref txid) = us.current_counterparty_commitment_txid {
2657+
let htlcs = us.counterparty_claimable_outpoints.get(txid);
2658+
walk_candidate_htlcs(htlcs.expect("Missing tx info for latest tx"));
2659+
}
2660+
if let Some(ref txid) = us.prev_counterparty_commitment_txid {
2661+
let htlcs = us.counterparty_claimable_outpoints.get(txid);
2662+
walk_candidate_htlcs(htlcs.expect("Missing tx info for previous tx"));
2663+
}
2664+
};
2665+
}
2666+
2667+
if Some(confirmed_txid) == us.current_counterparty_commitment_txid
2668+
|| Some(confirmed_txid) == us.prev_counterparty_commitment_txid
2669+
{
2670+
let htlcs = us.counterparty_claimable_outpoints.get(&confirmed_txid).unwrap();
2671+
walk_htlcs!(htlcs.iter().filter_map(|(a, b)| {
2672+
if let &Some(ref source) = b {
2673+
Some((a, Some(&**source)))
2674+
} else {
2675+
None
2676+
}
2677+
}));
2678+
} else if confirmed_txid == us.current_holder_commitment_tx.txid {
2679+
let mut htlcs =
2680+
us.current_holder_commitment_tx.htlc_outputs.iter().map(|(a, _, c)| (a, c.as_ref()));
2681+
walk_htlcs!(htlcs);
2682+
} else if let Some(prev_commitment_tx) = &us.prev_holder_signed_commitment_tx {
2683+
if confirmed_txid == prev_commitment_tx.txid {
2684+
let mut htlcs =
2685+
prev_commitment_tx.htlc_outputs.iter().map(|(a, _, c)| (a, c.as_ref()));
2686+
walk_htlcs!(htlcs);
2687+
} else {
2688+
let htlcs_confirmed: &[(&HTLCOutputInCommitment, _)] = &[];
2689+
walk_htlcs!(htlcs_confirmed.iter());
2690+
}
2691+
} else {
2692+
let htlcs_confirmed: &[(&HTLCOutputInCommitment, _)] = &[];
2693+
walk_htlcs!(htlcs_confirmed.iter());
2694+
}
2695+
2696+
res
2697+
}
2698+
25832699
/// Gets the set of outbound HTLCs which are pending resolution in this channel or which were
25842700
/// resolved with a preimage from our counterparty.
25852701
///

lightning/src/ln/channelmanager.rs

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13968,6 +13968,27 @@ where
1396813968
},
1396913969
}
1397013970
}
13971+
for (htlc_source, payment_hash) in monitor.get_onchain_failed_outbound_htlcs() {
13972+
if let Some(node_id) = monitor.get_counterparty_node_id() {
13973+
log_info!(
13974+
args.logger,
13975+
"Failing HTLC with payment hash {} as it was resolved on-chain.",
13976+
payment_hash
13977+
);
13978+
failed_htlcs.push((
13979+
htlc_source,
13980+
payment_hash,
13981+
node_id,
13982+
monitor.channel_id(),
13983+
));
13984+
} else {
13985+
log_warn!(
13986+
args.logger,
13987+
"Unable to fail HTLC with payment hash {} after being resolved on-chain due to incredibly old monitor.",
13988+
payment_hash
13989+
);
13990+
}
13991+
}
1397113992
}
1397213993

1397313994
// Whether the downstream channel was closed or not, try to re-apply any payment
@@ -14554,8 +14575,9 @@ where
1455414575
}
1455514576

1455614577
for htlc_source in failed_htlcs.drain(..) {
14557-
let (source, payment_hash, counterparty_node_id, channel_id) = htlc_source;
14558-
let receiver = HTLCDestination::NextHopChannel { node_id: Some(counterparty_node_id), channel_id };
14578+
let (source, payment_hash, counterparty_id, channel_id) = htlc_source;
14579+
let receiver =
14580+
HTLCDestination::NextHopChannel { node_id: Some(counterparty_id), channel_id };
1455914581
let reason = HTLCFailReason::from_failure_code(0x4000 | 8);
1456014582
channel_manager.fail_htlc_backwards_internal(&source, &payment_hash, &reason, receiver);
1456114583
}

0 commit comments

Comments
 (0)