@@ -507,19 +507,19 @@ struct ClaimablePayments {
507507/// running normally, and specifically must be processed before any other non-background
508508/// [`ChannelMonitorUpdate`]s are applied.
509509enum BackgroundEvent {
510- /// Handle a ChannelMonitorUpdate which closes the channel. This is only separated from
511- /// [`Self::MonitorUpdateRegeneratedOnStartup`] as the maybe-non-closing variant needs a public
512- /// key to handle channel resumption, whereas if the channel has been force-closed we do not
513- /// need the counterparty node_id.
510+ /// Handle a ChannelMonitorUpdate which closes the channel or for an already-closed channel.
511+ /// This is only separated from [`Self::MonitorUpdateRegeneratedOnStartup`] as the
512+ /// maybe-non-closing variant needs a public key to handle channel resumption, whereas if the
513+ /// channel has been force-closed we do not need the counterparty node_id.
514514 ///
515515 /// Note that any such events are lost on shutdown, so in general they must be updates which
516516 /// are regenerated on startup.
517- ClosingMonitorUpdateRegeneratedOnStartup ( ( OutPoint , ChannelMonitorUpdate ) ) ,
517+ ClosedMonitorUpdateRegeneratedOnStartup ( ( OutPoint , ChannelMonitorUpdate ) ) ,
518518 /// Handle a ChannelMonitorUpdate which may or may not close the channel and may unblock the
519519 /// channel to continue normal operation.
520520 ///
521521 /// In general this should be used rather than
522- /// [`Self::ClosingMonitorUpdateRegeneratedOnStartup `], however in cases where the
522+ /// [`Self::ClosedMonitorUpdateRegeneratedOnStartup `], however in cases where the
523523 /// `counterparty_node_id` is not available as the channel has closed from a [`ChannelMonitor`]
524524 /// error the other variant is acceptable.
525525 ///
@@ -1114,7 +1114,6 @@ where
11141114 /// Notifier the lock contains sends out a notification when the lock is released.
11151115 total_consistency_lock : RwLock < ( ) > ,
11161116
1117- #[ cfg( debug_assertions) ]
11181117 background_events_processed_since_startup : AtomicBool ,
11191118
11201119 persistence_notifier : Notifier ,
@@ -1917,9 +1916,7 @@ macro_rules! handle_new_monitor_update {
19171916 // update_maps_on_chan_removal needs to be able to take id_to_peer, so make sure we can in
19181917 // any case so that it won't deadlock.
19191918 debug_assert_ne!( $self. id_to_peer. held_by_thread( ) , LockHeldState :: HeldByThread ) ;
1920- #[ cfg( debug_assertions) ] {
1921- debug_assert!( $self. background_events_processed_since_startup. load( Ordering :: Acquire ) ) ;
1922- }
1919+ debug_assert!( $self. background_events_processed_since_startup. load( Ordering :: Acquire ) ) ;
19231920 match $update_res {
19241921 ChannelMonitorUpdateStatus :: InProgress => {
19251922 log_debug!( $self. logger, "ChannelMonitor update for {} in flight, holding messages until the update completes." ,
@@ -2111,7 +2108,6 @@ where
21112108 pending_events_processor : AtomicBool :: new ( false ) ,
21122109 pending_background_events : Mutex :: new ( Vec :: new ( ) ) ,
21132110 total_consistency_lock : RwLock :: new ( ( ) ) ,
2114- #[ cfg( debug_assertions) ]
21152111 background_events_processed_since_startup : AtomicBool :: new ( false ) ,
21162112 persistence_notifier : Notifier :: new ( ) ,
21172113
@@ -4150,7 +4146,6 @@ where
41504146 fn process_background_events ( & self ) -> NotifyOption {
41514147 debug_assert_ne ! ( self . total_consistency_lock. held_by_thread( ) , LockHeldState :: NotHeldByThread ) ;
41524148
4153- #[ cfg( debug_assertions) ]
41544149 self . background_events_processed_since_startup . store ( true , Ordering :: Release ) ;
41554150
41564151 let mut background_events = Vec :: new ( ) ;
@@ -4161,7 +4156,7 @@ where
41614156
41624157 for event in background_events. drain ( ..) {
41634158 match event {
4164- BackgroundEvent :: ClosingMonitorUpdateRegeneratedOnStartup ( ( funding_txo, update) ) => {
4159+ BackgroundEvent :: ClosedMonitorUpdateRegeneratedOnStartup ( ( funding_txo, update) ) => {
41654160 // The channel has already been closed, so no use bothering to care about the
41664161 // monitor updating completing.
41674162 let _ = self . chain_monitor . update_channel ( funding_txo, & update) ;
@@ -4741,6 +4736,11 @@ where
47414736 -> Result < ( ) , ( PublicKey , MsgHandleErrInternal ) > {
47424737 //TODO: Delay the claimed_funds relaying just like we do outbound relay!
47434738
4739+ // If we haven't yet run background events assume we're still deserializing and shouldn't
4740+ // actually pass `ChannelMonitorUpdate`s to users yet. Instead, queue them up as
4741+ // `BackgroundEvent`s.
4742+ let during_init = !self . background_events_processed_since_startup . load ( Ordering :: Acquire ) ;
4743+
47444744 {
47454745 let per_peer_state = self . per_peer_state . read ( ) . unwrap ( ) ;
47464746 let chan_id = prev_hop. outpoint . to_channel_id ( ) ;
@@ -4767,14 +4767,26 @@ where
47674767 log_bytes!( chan_id) , action) ;
47684768 peer_state. monitor_update_blocked_actions . entry ( chan_id) . or_insert ( Vec :: new ( ) ) . push ( action) ;
47694769 }
4770- let res = handle_new_monitor_update ! ( self , prev_hop. outpoint, monitor_update, peer_state_lock,
4771- peer_state, per_peer_state, chan) ;
4772- if let Err ( e) = res {
4773- // TODO: This is a *critical* error - we probably updated the outbound edge
4774- // of the HTLC's monitor with a preimage. We should retry this monitor
4775- // update over and over again until morale improves.
4776- log_error ! ( self . logger, "Failed to update channel monitor with preimage {:?}" , payment_preimage) ;
4777- return Err ( ( counterparty_node_id, e) ) ;
4770+ if !during_init {
4771+ let res = handle_new_monitor_update ! ( self , prev_hop. outpoint, monitor_update, peer_state_lock,
4772+ peer_state, per_peer_state, chan) ;
4773+ if let Err ( e) = res {
4774+ // TODO: This is a *critical* error - we probably updated the outbound edge
4775+ // of the HTLC's monitor with a preimage. We should retry this monitor
4776+ // update over and over again until morale improves.
4777+ log_error ! ( self . logger, "Failed to update channel monitor with preimage {:?}" , payment_preimage) ;
4778+ return Err ( ( counterparty_node_id, e) ) ;
4779+ }
4780+ } else {
4781+ // If we're running during init we cannot update a monitor directly -
4782+ // they probably haven't actually been loaded yet. Instead, push the
4783+ // monitor update as a background event.
4784+ self . pending_background_events . lock ( ) . unwrap ( ) . push (
4785+ BackgroundEvent :: MonitorUpdateRegeneratedOnStartup {
4786+ counterparty_node_id,
4787+ funding_txo : prev_hop. outpoint ,
4788+ update : monitor_update. clone ( ) ,
4789+ } ) ;
47784790 }
47794791 }
47804792 return Ok ( ( ) ) ;
@@ -4787,16 +4799,34 @@ where
47874799 payment_preimage,
47884800 } ] ,
47894801 } ;
4790- // We update the ChannelMonitor on the backward link, after
4791- // receiving an `update_fulfill_htlc` from the forward link.
4792- let update_res = self . chain_monitor . update_channel ( prev_hop. outpoint , & preimage_update) ;
4793- if update_res != ChannelMonitorUpdateStatus :: Completed {
4794- // TODO: This needs to be handled somehow - if we receive a monitor update
4795- // with a preimage we *must* somehow manage to propagate it to the upstream
4796- // channel, or we must have an ability to receive the same event and try
4797- // again on restart.
4798- log_error ! ( self . logger, "Critical error: failed to update channel monitor with preimage {:?}: {:?}" ,
4799- payment_preimage, update_res) ;
4802+
4803+ if !during_init {
4804+ // We update the ChannelMonitor on the backward link, after
4805+ // receiving an `update_fulfill_htlc` from the forward link.
4806+ let update_res = self . chain_monitor . update_channel ( prev_hop. outpoint , & preimage_update) ;
4807+ if update_res != ChannelMonitorUpdateStatus :: Completed {
4808+ // TODO: This needs to be handled somehow - if we receive a monitor update
4809+ // with a preimage we *must* somehow manage to propagate it to the upstream
4810+ // channel, or we must have an ability to receive the same event and try
4811+ // again on restart.
4812+ log_error ! ( self . logger, "Critical error: failed to update channel monitor with preimage {:?}: {:?}" ,
4813+ payment_preimage, update_res) ;
4814+ }
4815+ } else {
4816+ // If we're running during init we cannot update a monitor directly - they probably
4817+ // haven't actually been loaded yet. Instead, push the monitor update as a background
4818+ // event.
4819+ // Note that while it's safe to use `ClosedMonitorUpdateRegeneratedOnStartup` here (the
4820+ // channel is already closed) we need to ultimately handle the monitor update
4821+ // completion action only after we've completed the monitor update. This is the only
4822+ // way to guarantee this update *will* be regenerated on startup (otherwise if this was
4823+ // from a forwarded HTLC the downstream preimage may be deleted before we claim
4824+ // upstream). Thus, we need to transition to some new `BackgroundEvent` type which will
4825+ // complete the monitor update completion action from `completion_action`.
4826+ self . pending_background_events . lock ( ) . unwrap ( ) . push (
4827+ BackgroundEvent :: ClosedMonitorUpdateRegeneratedOnStartup ( (
4828+ prev_hop. outpoint , preimage_update,
4829+ ) ) ) ;
48004830 }
48014831 // Note that we do process the completion action here. This totally could be a
48024832 // duplicate claim, but we have no way of knowing without interrogating the
@@ -4814,6 +4844,8 @@ where
48144844 fn claim_funds_internal ( & self , source : HTLCSource , payment_preimage : PaymentPreimage , forwarded_htlc_value_msat : Option < u64 > , from_onchain : bool , next_channel_id : [ u8 ; 32 ] ) {
48154845 match source {
48164846 HTLCSource :: OutboundRoute { session_priv, payment_id, path, .. } => {
4847+ debug_assert ! ( self . background_events_processed_since_startup. load( Ordering :: Acquire ) ,
4848+ "We don't support claim_htlc claims during startup - monitors may not be available yet" ) ;
48174849 self . pending_outbound_payments . claim_htlc ( payment_id, payment_preimage, session_priv, path, from_onchain, & self . pending_events , & self . logger ) ;
48184850 } ,
48194851 HTLCSource :: PreviousHopData ( hop_data) => {
@@ -5748,7 +5780,7 @@ where
57485780 }
57495781
57505782 /// Checks whether [`ChannelMonitorUpdate`]s generated by the receipt of a remote
5751- /// [`msgs::RevokeAndACK`] should be held for the given channel until some other event
5783+ /// [`msgs::RevokeAndACK`] should be held for the given channel until some other action
57525784 /// completes. Note that this needs to happen in the same [`PeerState`] mutex as any release of
57535785 /// the [`ChannelMonitorUpdate`] in question.
57545786 fn raa_monitor_updates_held ( & self ,
@@ -6365,7 +6397,7 @@ where
63656397 /// When something which was blocking a channel from updating its [`ChannelMonitor`] (e.g. an
63666398 /// [`Event`] being handled) completes, this should be called to restore the channel to normal
63676399 /// operation. It will double-check that nothing *else* is also blocking the same channel from
6368- /// making progress and then any blocked [`ChannelMonitorUpdate`]s fly.
6400+ /// making progress and then let any blocked [`ChannelMonitorUpdate`]s fly.
63696401 fn handle_monitor_update_release ( & self , counterparty_node_id : PublicKey , channel_funding_outpoint : OutPoint , mut completed_blocker : Option < RAAMonitorUpdateBlockingAction > ) {
63706402 let mut errors = Vec :: new ( ) ;
63716403 loop {
@@ -8294,7 +8326,7 @@ where
82948326 update_id : CLOSED_CHANNEL_UPDATE_ID ,
82958327 updates : vec ! [ ChannelMonitorUpdateStep :: ChannelForceClosed { should_broadcast: true } ] ,
82968328 } ;
8297- close_background_events. push ( BackgroundEvent :: ClosingMonitorUpdateRegeneratedOnStartup ( ( * funding_txo, monitor_update) ) ) ;
8329+ close_background_events. push ( BackgroundEvent :: ClosedMonitorUpdateRegeneratedOnStartup ( ( * funding_txo, monitor_update) ) ) ;
82988330 }
82998331 }
83008332
@@ -8549,6 +8581,11 @@ where
85498581 // Note that we have to do the above replays before we push new monitor updates.
85508582 pending_background_events. append ( & mut close_background_events) ;
85518583
8584+ // If there's any preimages for forwarded HTLCs hanging around in ChannelMonitors we
8585+ // should ensure we try them again on the inbound edge. We put them here and do so after we
8586+ // have a fully-constructed `ChannelManager` at the end.
8587+ let mut pending_claims_to_replay = Vec :: new ( ) ;
8588+
85528589 {
85538590 // If we're tracking pending payments, ensure we haven't lost any by looking at the
85548591 // ChannelMonitor data for any channels for which we do not have authorative state
@@ -8559,7 +8596,8 @@ where
85598596 // We only rebuild the pending payments map if we were most recently serialized by
85608597 // 0.0.102+
85618598 for ( _, monitor) in args. channel_monitors . iter ( ) {
8562- if id_to_peer. get ( & monitor. get_funding_txo ( ) . 0 . to_channel_id ( ) ) . is_none ( ) {
8599+ let counterparty_opt = id_to_peer. get ( & monitor. get_funding_txo ( ) . 0 . to_channel_id ( ) ) ;
8600+ if counterparty_opt. is_none ( ) {
85638601 for ( htlc_source, ( htlc, _) ) in monitor. get_pending_or_resolved_outbound_htlcs ( ) {
85648602 if let HTLCSource :: OutboundRoute { payment_id, session_priv, path, .. } = htlc_source {
85658603 if path. hops . is_empty ( ) {
@@ -8653,6 +8691,33 @@ where
86538691 }
86548692 }
86558693 }
8694+
8695+ // Whether the downstream channel was closed or not, try to re-apply any payment
8696+ // preimages from it which may be needed in upstream channels for forwarded
8697+ // payments.
8698+ let outbound_claimed_htlcs_iter = monitor. get_all_current_outbound_htlcs ( )
8699+ . into_iter ( )
8700+ . filter_map ( |( htlc_source, ( htlc, preimage_opt) ) | {
8701+ if let HTLCSource :: PreviousHopData ( _) = htlc_source {
8702+ if let Some ( payment_preimage) = preimage_opt {
8703+ Some ( ( htlc_source, payment_preimage, htlc. amount_msat ,
8704+ // Check if `counterparty_opt.is_none()` to see if the
8705+ // downstream chan is closed (because we don't have a
8706+ // channel_id -> peer map entry).
8707+ counterparty_opt. is_none ( ) ,
8708+ monitor. get_funding_txo ( ) . 0 . to_channel_id ( ) ) )
8709+ } else { None }
8710+ } else {
8711+ // If it was an outbound payment, we've handled it above - if a preimage
8712+ // came in and we persisted the `ChannelManager` we either handled it and
8713+ // are good to go or the channel force-closed - we don't have to handle the
8714+ // channel still live case here.
8715+ None
8716+ }
8717+ } ) ;
8718+ for tuple in outbound_claimed_htlcs_iter {
8719+ pending_claims_to_replay. push ( tuple) ;
8720+ }
86568721 }
86578722 }
86588723
@@ -8885,7 +8950,6 @@ where
88858950 pending_events_processor : AtomicBool :: new ( false ) ,
88868951 pending_background_events : Mutex :: new ( pending_background_events) ,
88878952 total_consistency_lock : RwLock :: new ( ( ) ) ,
8888- #[ cfg( debug_assertions) ]
88898953 background_events_processed_since_startup : AtomicBool :: new ( false ) ,
88908954 persistence_notifier : Notifier :: new ( ) ,
88918955
@@ -8904,6 +8968,14 @@ where
89048968 channel_manager. fail_htlc_backwards_internal ( & source, & payment_hash, & reason, receiver) ;
89058969 }
89068970
8971+ for ( source, preimage, downstream_value, downstream_closed, downstream_chan_id) in pending_claims_to_replay {
8972+ // We use `downstream_closed` in place of `from_onchain` here just as a guess - we
8973+ // don't remember in the `ChannelMonitor` where we got a preimage from, but if the
8974+ // channel is closed we just assume that it probably came from an on-chain claim.
8975+ channel_manager. claim_funds_internal ( source, preimage, Some ( downstream_value) ,
8976+ downstream_closed, downstream_chan_id) ;
8977+ }
8978+
89078979 //TODO: Broadcast channel update for closed channels, but only after we've made a
89088980 //connection or two.
89098981
0 commit comments