@@ -3871,44 +3871,68 @@ fn test_single_channel_multiple_mpp() {
38713871 // `ChannelManager::claim_payment` we'd end up hanging waiting to apply a
38723872 // `ChannelMonitorUpdate` until after it completed. See the commit which introduced this test
38733873 // for more info.
3874- let chanmon_cfgs = create_chanmon_cfgs ( 7 ) ;
3875- let node_cfgs = create_node_cfgs ( 7 , & chanmon_cfgs) ;
3876- let node_chanmgrs = create_node_chanmgrs ( 7 , & node_cfgs, & [ None ; 7 ] ) ;
3877- let mut nodes = create_network ( 7 , & node_cfgs, & node_chanmgrs) ;
3878-
3879- let node_5_id = nodes[ 5 ] . node . get_our_node_id ( ) ;
3880- let node_6_id = nodes[ 6 ] . node . get_our_node_id ( ) ;
3881-
3882- // Send an MPP payment in four parts along the path shown from top to bottom
3883- // 0
3884- // 1 2 3 4
3885- // 5
3886- // 6
3874+ let chanmon_cfgs = create_chanmon_cfgs ( 9 ) ;
3875+ let node_cfgs = create_node_cfgs ( 9 , & chanmon_cfgs) ;
3876+ let node_chanmgrs = create_node_chanmgrs ( 9 , & node_cfgs, & [ None ; 9 ] ) ;
3877+ let mut nodes = create_network ( 9 , & node_cfgs, & node_chanmgrs) ;
3878+
3879+ let node_7_id = nodes[ 7 ] . node . get_our_node_id ( ) ;
3880+ let node_8_id = nodes[ 8 ] . node . get_our_node_id ( ) ;
3881+
3882+ // Send an MPP payment in six parts along the path shown from top to bottom
3883+ // 0
3884+ // 1 2 3 4 5 6
3885+ // 7
3886+ // 8
3887+ //
3888+ // We can in theory reproduce this issue with fewer channels/HTLCs, but getting this test
3889+ // robust is rather challenging. We rely on having the main test thread wait on locks held in
3890+ // the background `claim_funds` thread and unlocking when the `claim_funds` thread completes a
3891+ // single `ChannelMonitorUpdate`.
3892+ // This thread calls `get_and_clear_pending_msg_events()` and `handle_revoke_and_ack()`, both
3893+ // of which require `ChannelManager` locks, but we have to make sure this thread gets a chance
3894+ // to be blocked on the mutexes before we let the background thread wake `claim_funds` so that
3895+ // the mutex can switch to this main thread.
3896+ // This relies on our locks being fair, but also on our threads getting runtime during the test
3897+ // run, which can be pretty competitive. Thus we do a dumb dance to be as conservative as
3898+ // possible - we have a background thread which completes a `ChannelMonitorUpdate` (by sending
3899+ // into the `write_blocker` mpsc) but it doesn't run until a mpsc channel sends from this main
3900+ // thread to the background thread, and then we let it sleep a while before we send the
3901+ // `ChannelMonitorUpdate` unblocker.
3902+ // Further, we give ourselves two chances each time, needing 4 HTLCs just to unlock our two
3903+ // `ChannelManager` calls. We then need a few remaining HTLCs to actually trigger the bug, so
3904+ // we use 6 HTLCs.
3905+ const MAX_THREAD_INIT_TIME : std:: time:: Duration = std:: time:: Duration :: from_secs ( 1 ) ;
38873906
38883907 create_announced_chan_between_nodes_with_value ( & nodes, 0 , 1 , 100_000 , 0 ) ;
38893908 create_announced_chan_between_nodes_with_value ( & nodes, 0 , 2 , 100_000 , 0 ) ;
38903909 create_announced_chan_between_nodes_with_value ( & nodes, 0 , 3 , 100_000 , 0 ) ;
38913910 create_announced_chan_between_nodes_with_value ( & nodes, 0 , 4 , 100_000 , 0 ) ;
3892- create_announced_chan_between_nodes_with_value ( & nodes, 1 , 5 , 100_000 , 0 ) ;
3893- create_announced_chan_between_nodes_with_value ( & nodes, 2 , 5 , 100_000 , 0 ) ;
3894- create_announced_chan_between_nodes_with_value ( & nodes, 3 , 5 , 100_000 , 0 ) ;
3895- create_announced_chan_between_nodes_with_value ( & nodes, 4 , 5 , 100_000 , 0 ) ;
3896- create_announced_chan_between_nodes_with_value ( & nodes, 5 , 6 , 1_000_000 , 0 ) ;
3911+ create_announced_chan_between_nodes_with_value ( & nodes, 0 , 5 , 100_000 , 0 ) ;
3912+ create_announced_chan_between_nodes_with_value ( & nodes, 0 , 6 , 100_000 , 0 ) ;
38973913
3898- let ( mut route, payment_hash, payment_preimage, payment_secret) = get_route_and_payment_hash ! ( & nodes[ 0 ] , nodes[ 6 ] , 30_000_000 ) ;
3914+ create_announced_chan_between_nodes_with_value ( & nodes, 1 , 7 , 100_000 , 0 ) ;
3915+ create_announced_chan_between_nodes_with_value ( & nodes, 2 , 7 , 100_000 , 0 ) ;
3916+ create_announced_chan_between_nodes_with_value ( & nodes, 3 , 7 , 100_000 , 0 ) ;
3917+ create_announced_chan_between_nodes_with_value ( & nodes, 4 , 7 , 100_000 , 0 ) ;
3918+ create_announced_chan_between_nodes_with_value ( & nodes, 5 , 7 , 100_000 , 0 ) ;
3919+ create_announced_chan_between_nodes_with_value ( & nodes, 6 , 7 , 100_000 , 0 ) ;
3920+ create_announced_chan_between_nodes_with_value ( & nodes, 7 , 8 , 1_000_000 , 0 ) ;
38993921
3900- send_along_route_with_secret ( & nodes[ 0 ] , route, & [ & [ & nodes[ 1 ] , & nodes[ 5 ] , & nodes[ 6 ] ] , & [ & nodes[ 2 ] , & nodes[ 5 ] , & nodes[ 6 ] ] , & [ & nodes[ 3 ] , & nodes[ 5 ] , & nodes[ 6 ] ] , & [ & nodes[ 4 ] , & nodes[ 5 ] , & nodes[ 6 ] ] ] , 30_000_000 , payment_hash, payment_secret) ;
3922+ let ( mut route, payment_hash, payment_preimage, payment_secret) = get_route_and_payment_hash ! ( & nodes[ 0 ] , nodes[ 8 ] , 50_000_000 ) ;
3923+
3924+ send_along_route_with_secret ( & nodes[ 0 ] , route, & [ & [ & nodes[ 1 ] , & nodes[ 7 ] , & nodes[ 8 ] ] , & [ & nodes[ 2 ] , & nodes[ 7 ] , & nodes[ 8 ] ] , & [ & nodes[ 3 ] , & nodes[ 7 ] , & nodes[ 8 ] ] , & [ & nodes[ 4 ] , & nodes[ 7 ] , & nodes[ 8 ] ] , & [ & nodes[ 5 ] , & nodes[ 7 ] , & nodes[ 8 ] ] , & [ & nodes[ 6 ] , & nodes[ 7 ] , & nodes[ 8 ] ] ] , 50_000_000 , payment_hash, payment_secret) ;
39013925
39023926 let ( do_a_write, blocker) = std:: sync:: mpsc:: sync_channel ( 0 ) ;
3903- * nodes[ 6 ] . chain_monitor . write_blocker . lock ( ) . unwrap ( ) = Some ( blocker) ;
3927+ * nodes[ 8 ] . chain_monitor . write_blocker . lock ( ) . unwrap ( ) = Some ( blocker) ;
39043928
39053929 // Until we have std::thread::scoped we have to unsafe { turn off the borrow checker }.
39063930 // We do this by casting a pointer to a `TestChannelManager` to a pointer to a
39073931 // `TestChannelManager` with different (in this case 'static) lifetime.
39083932 // This is even suggested in the second example at
39093933 // https://doc.rust-lang.org/std/mem/fn.transmute.html#examples
39103934 let claim_node: & ' static TestChannelManager < ' static , ' static > =
3911- unsafe { std:: mem:: transmute ( nodes[ 6 ] . node as & TestChannelManager ) } ;
3935+ unsafe { std:: mem:: transmute ( nodes[ 8 ] . node as & TestChannelManager ) } ;
39123936 let thrd = std:: thread:: spawn ( move || {
39133937 // Initiate the claim in a background thread as it will immediately block waiting on the
39143938 // `write_blocker` we set above.
@@ -3924,85 +3948,125 @@ fn test_single_channel_multiple_mpp() {
39243948 // `claim_funds` is holding. Thus, we release a second write after a small sleep in the
39253949 // background to give `claim_funds` a chance to step forward, unblocking
39263950 // `get_and_clear_pending_msg_events`.
3927- const MAX_THREAD_INIT_TIME : std:: time:: Duration = std:: time:: Duration :: from_millis ( 10 ) ;
39283951 let do_a_write_background = do_a_write. clone ( ) ;
3952+ let ( start_thrd2, block_thrd2) = std:: sync:: mpsc:: sync_channel ( 0 ) ;
39293953 let thrd2 = std:: thread:: spawn ( move || {
3954+ block_thrd2. recv ( ) . unwrap ( ) ;
3955+ std:: thread:: sleep ( MAX_THREAD_INIT_TIME ) ;
3956+ do_a_write_background. send ( ( ) ) . unwrap ( ) ;
39303957 std:: thread:: sleep ( MAX_THREAD_INIT_TIME ) ;
39313958 do_a_write_background. send ( ( ) ) . unwrap ( ) ;
39323959 } ) ;
3933- let first_updates = get_htlc_update_msgs ( & nodes[ 6 ] , & nodes[ 5 ] . node . get_our_node_id ( ) ) ;
3960+ start_thrd2. send ( ( ) ) . unwrap ( ) ;
3961+ let first_updates = get_htlc_update_msgs ( & nodes[ 8 ] , & nodes[ 7 ] . node . get_our_node_id ( ) ) ;
39343962 thrd2. join ( ) . unwrap ( ) ;
39353963
3936- nodes[ 5 ] . node . peer_disconnected ( nodes[ 1 ] . node . get_our_node_id ( ) ) ;
3937- nodes[ 5 ] . node . peer_disconnected ( nodes[ 2 ] . node . get_our_node_id ( ) ) ;
3938- nodes[ 5 ] . node . peer_disconnected ( nodes[ 3 ] . node . get_our_node_id ( ) ) ;
3939- nodes[ 5 ] . node . peer_disconnected ( nodes[ 4 ] . node . get_our_node_id ( ) ) ;
3940-
3941- nodes[ 5 ] . node . handle_update_fulfill_htlc ( node_6_id, & first_updates. update_fulfill_htlcs [ 0 ] ) ;
3942- check_added_monitors ( & nodes[ 5 ] , 1 ) ;
3943- expect_payment_forwarded ! ( nodes[ 5 ] , nodes[ 1 ] , nodes[ 6 ] , Some ( 1000 ) , false , false ) ;
3944- nodes[ 5 ] . node . handle_commitment_signed ( node_6_id, & first_updates. commitment_signed ) ;
3945- check_added_monitors ( & nodes[ 5 ] , 1 ) ;
3946- let ( raa, cs) = get_revoke_commit_msgs ( & nodes[ 5 ] , & node_6_id) ;
3964+ // Disconnect node 6 from all its peers so it doesn't bother to fail the HTLCs back
3965+ nodes[ 7 ] . node . peer_disconnected ( nodes[ 1 ] . node . get_our_node_id ( ) ) ;
3966+ nodes[ 7 ] . node . peer_disconnected ( nodes[ 2 ] . node . get_our_node_id ( ) ) ;
3967+ nodes[ 7 ] . node . peer_disconnected ( nodes[ 3 ] . node . get_our_node_id ( ) ) ;
3968+ nodes[ 7 ] . node . peer_disconnected ( nodes[ 4 ] . node . get_our_node_id ( ) ) ;
3969+ nodes[ 7 ] . node . peer_disconnected ( nodes[ 5 ] . node . get_our_node_id ( ) ) ;
3970+ nodes[ 7 ] . node . peer_disconnected ( nodes[ 6 ] . node . get_our_node_id ( ) ) ;
3971+
3972+ nodes[ 7 ] . node . handle_update_fulfill_htlc ( node_8_id, & first_updates. update_fulfill_htlcs [ 0 ] ) ;
3973+ check_added_monitors ( & nodes[ 7 ] , 1 ) ;
3974+ expect_payment_forwarded ! ( nodes[ 7 ] , nodes[ 1 ] , nodes[ 8 ] , Some ( 1000 ) , false , false ) ;
3975+ nodes[ 7 ] . node . handle_commitment_signed ( node_8_id, & first_updates. commitment_signed ) ;
3976+ check_added_monitors ( & nodes[ 7 ] , 1 ) ;
3977+ let ( raa, cs) = get_revoke_commit_msgs ( & nodes[ 7 ] , & node_8_id) ;
39473978
39483979 // Now, handle the `revoke_and_ack` from node 5. Note that `claim_funds` is still blocked on
39493980 // our peer lock, so we have to release a write to let it process.
3981+ // After this call completes, the channel previously would be locked up and should not be able
3982+ // to make further progress.
39503983 let do_a_write_background = do_a_write. clone ( ) ;
3984+ let ( start_thrd3, block_thrd3) = std:: sync:: mpsc:: sync_channel ( 0 ) ;
39513985 let thrd3 = std:: thread:: spawn ( move || {
3986+ block_thrd3. recv ( ) . unwrap ( ) ;
3987+ std:: thread:: sleep ( MAX_THREAD_INIT_TIME ) ;
3988+ do_a_write_background. send ( ( ) ) . unwrap ( ) ;
39523989 std:: thread:: sleep ( MAX_THREAD_INIT_TIME ) ;
39533990 do_a_write_background. send ( ( ) ) . unwrap ( ) ;
39543991 } ) ;
3955- nodes[ 6 ] . node . handle_revoke_and_ack ( node_5_id, & raa) ;
3992+ start_thrd3. send ( ( ) ) . unwrap ( ) ;
3993+ nodes[ 8 ] . node . handle_revoke_and_ack ( node_7_id, & raa) ;
39563994 thrd3. join ( ) . unwrap ( ) ;
3995+ assert ! ( !thrd. is_finished( ) ) ;
39573996
39583997 let thrd4 = std:: thread:: spawn ( move || {
3959- std:: thread:: sleep ( MAX_THREAD_INIT_TIME ) ;
39603998 do_a_write. send ( ( ) ) . unwrap ( ) ;
39613999 do_a_write. send ( ( ) ) . unwrap ( ) ;
39624000 } ) ;
39634001
39644002 thrd4. join ( ) . unwrap ( ) ;
39654003 thrd. join ( ) . unwrap ( ) ;
39664004
3967- expect_payment_claimed ! ( nodes[ 6 ] , payment_hash, 30_000_000 ) ;
4005+ expect_payment_claimed ! ( nodes[ 8 ] , payment_hash, 50_000_000 ) ;
39684006
3969- // At the end, we should have 5 ChannelMonitorUpdates - 4 for HTLC claims, and one for the
4007+ // At the end, we should have 7 ChannelMonitorUpdates - 6 for HTLC claims, and one for the
39704008 // above `revoke_and_ack`.
3971- check_added_monitors ( & nodes[ 6 ] , 5 ) ;
3972-
3973- // Now drive everything to the end, at least as far as node 6 is concerned...
3974- * nodes[ 6 ] . chain_monitor . write_blocker . lock ( ) . unwrap ( ) = None ;
3975- nodes[ 6 ] . node . handle_commitment_signed ( node_5_id, & cs) ;
3976- check_added_monitors ( & nodes[ 6 ] , 1 ) ;
3977-
3978- let ( updates, raa) = get_updates_and_revoke ( & nodes[ 6 ] , & nodes[ 5 ] . node . get_our_node_id ( ) ) ;
3979- nodes[ 5 ] . node . handle_update_fulfill_htlc ( node_6_id, & updates. update_fulfill_htlcs [ 0 ] ) ;
3980- expect_payment_forwarded ! ( nodes[ 5 ] , nodes[ 2 ] , nodes[ 6 ] , Some ( 1000 ) , false , false ) ;
3981- nodes[ 5 ] . node . handle_update_fulfill_htlc ( node_6_id, & updates. update_fulfill_htlcs [ 1 ] ) ;
3982- expect_payment_forwarded ! ( nodes[ 5 ] , nodes[ 3 ] , nodes[ 6 ] , Some ( 1000 ) , false , false ) ;
3983- nodes[ 5 ] . node . handle_commitment_signed ( node_6_id, & updates. commitment_signed ) ;
3984- nodes[ 5 ] . node . handle_revoke_and_ack ( node_6_id, & raa) ;
3985- check_added_monitors ( & nodes[ 5 ] , 4 ) ;
3986-
3987- let ( raa, cs) = get_revoke_commit_msgs ( & nodes[ 5 ] , & node_6_id) ;
3988-
3989- nodes[ 6 ] . node . handle_revoke_and_ack ( node_5_id, & raa) ;
3990- nodes[ 6 ] . node . handle_commitment_signed ( node_5_id, & cs) ;
3991- check_added_monitors ( & nodes[ 6 ] , 2 ) ;
3992-
3993- let ( updates, raa) = get_updates_and_revoke ( & nodes[ 6 ] , & nodes[ 5 ] . node . get_our_node_id ( ) ) ;
3994- nodes[ 5 ] . node . handle_update_fulfill_htlc ( node_6_id, & updates. update_fulfill_htlcs [ 0 ] ) ;
3995- expect_payment_forwarded ! ( nodes[ 5 ] , nodes[ 4 ] , nodes[ 6 ] , Some ( 1000 ) , false , false ) ;
3996- nodes[ 5 ] . node . handle_commitment_signed ( node_6_id, & updates. commitment_signed ) ;
3997- nodes[ 5 ] . node . handle_revoke_and_ack ( node_6_id, & raa) ;
3998- check_added_monitors ( & nodes[ 5 ] , 3 ) ;
3999-
4000- let ( raa, cs) = get_revoke_commit_msgs ( & nodes[ 5 ] , & node_6_id) ;
4001- nodes[ 6 ] . node . handle_revoke_and_ack ( node_5_id, & raa) ;
4002- nodes[ 6 ] . node . handle_commitment_signed ( node_5_id, & cs) ;
4003- check_added_monitors ( & nodes[ 6 ] , 2 ) ;
4004-
4005- let raa = get_event_msg ! ( nodes[ 6 ] , MessageSendEvent :: SendRevokeAndACK , node_5_id) ;
4006- nodes[ 5 ] . node . handle_revoke_and_ack ( node_6_id, & raa) ;
4007- check_added_monitors ( & nodes[ 5 ] , 1 ) ;
4009+ check_added_monitors ( & nodes[ 8 ] , 7 ) ;
4010+
4011+ // Now drive everything to the end, at least as far as node 7 is concerned...
4012+ * nodes[ 8 ] . chain_monitor . write_blocker . lock ( ) . unwrap ( ) = None ;
4013+ nodes[ 8 ] . node . handle_commitment_signed ( node_7_id, & cs) ;
4014+ check_added_monitors ( & nodes[ 8 ] , 1 ) ;
4015+
4016+ let ( updates, raa) = get_updates_and_revoke ( & nodes[ 8 ] , & nodes[ 7 ] . node . get_our_node_id ( ) ) ;
4017+
4018+ nodes[ 7 ] . node . handle_update_fulfill_htlc ( node_8_id, & updates. update_fulfill_htlcs [ 0 ] ) ;
4019+ expect_payment_forwarded ! ( nodes[ 7 ] , nodes[ 2 ] , nodes[ 8 ] , Some ( 1000 ) , false , false ) ;
4020+ nodes[ 7 ] . node . handle_update_fulfill_htlc ( node_8_id, & updates. update_fulfill_htlcs [ 1 ] ) ;
4021+ expect_payment_forwarded ! ( nodes[ 7 ] , nodes[ 3 ] , nodes[ 8 ] , Some ( 1000 ) , false , false ) ;
4022+ let mut next_source = 4 ;
4023+ if let Some ( update) = updates. update_fulfill_htlcs . get ( 2 ) {
4024+ nodes[ 7 ] . node . handle_update_fulfill_htlc ( node_8_id, update) ;
4025+ expect_payment_forwarded ! ( nodes[ 7 ] , nodes[ 4 ] , nodes[ 8 ] , Some ( 1000 ) , false , false ) ;
4026+ next_source += 1 ;
4027+ }
4028+
4029+ nodes[ 7 ] . node . handle_commitment_signed ( node_8_id, & updates. commitment_signed ) ;
4030+ nodes[ 7 ] . node . handle_revoke_and_ack ( node_8_id, & raa) ;
4031+ if updates. update_fulfill_htlcs . get ( 2 ) . is_some ( ) {
4032+ check_added_monitors ( & nodes[ 7 ] , 5 ) ;
4033+ } else {
4034+ check_added_monitors ( & nodes[ 7 ] , 4 ) ;
4035+ }
4036+
4037+ let ( raa, cs) = get_revoke_commit_msgs ( & nodes[ 7 ] , & node_8_id) ;
4038+
4039+ nodes[ 8 ] . node . handle_revoke_and_ack ( node_7_id, & raa) ;
4040+ nodes[ 8 ] . node . handle_commitment_signed ( node_7_id, & cs) ;
4041+ check_added_monitors ( & nodes[ 8 ] , 2 ) ;
4042+
4043+ let ( updates, raa) = get_updates_and_revoke ( & nodes[ 8 ] , & node_7_id) ;
4044+
4045+ nodes[ 7 ] . node . handle_update_fulfill_htlc ( node_8_id, & updates. update_fulfill_htlcs [ 0 ] ) ;
4046+ expect_payment_forwarded ! ( nodes[ 7 ] , nodes[ next_source] , nodes[ 8 ] , Some ( 1000 ) , false , false ) ;
4047+ next_source += 1 ;
4048+ nodes[ 7 ] . node . handle_update_fulfill_htlc ( node_8_id, & updates. update_fulfill_htlcs [ 1 ] ) ;
4049+ expect_payment_forwarded ! ( nodes[ 7 ] , nodes[ next_source] , nodes[ 8 ] , Some ( 1000 ) , false , false ) ;
4050+ next_source += 1 ;
4051+ if let Some ( update) = updates. update_fulfill_htlcs . get ( 2 ) {
4052+ nodes[ 7 ] . node . handle_update_fulfill_htlc ( node_8_id, update) ;
4053+ expect_payment_forwarded ! ( nodes[ 7 ] , nodes[ next_source] , nodes[ 8 ] , Some ( 1000 ) , false , false ) ;
4054+ }
4055+
4056+ nodes[ 7 ] . node . handle_commitment_signed ( node_8_id, & updates. commitment_signed ) ;
4057+ nodes[ 7 ] . node . handle_revoke_and_ack ( node_8_id, & raa) ;
4058+ if updates. update_fulfill_htlcs . get ( 2 ) . is_some ( ) {
4059+ check_added_monitors ( & nodes[ 7 ] , 5 ) ;
4060+ } else {
4061+ check_added_monitors ( & nodes[ 7 ] , 4 ) ;
4062+ }
4063+
4064+ let ( raa, cs) = get_revoke_commit_msgs ( & nodes[ 7 ] , & node_8_id) ;
4065+ nodes[ 8 ] . node . handle_revoke_and_ack ( node_7_id, & raa) ;
4066+ nodes[ 8 ] . node . handle_commitment_signed ( node_7_id, & cs) ;
4067+ check_added_monitors ( & nodes[ 8 ] , 2 ) ;
4068+
4069+ let raa = get_event_msg ! ( nodes[ 8 ] , MessageSendEvent :: SendRevokeAndACK , node_7_id) ;
4070+ nodes[ 7 ] . node . handle_revoke_and_ack ( node_8_id, & raa) ;
4071+ check_added_monitors ( & nodes[ 7 ] , 1 ) ;
40084072}
0 commit comments