4343% % archive), but we don't want them to process so many inputs that
4444% % they consume their blocking queues before handing off.
4545
46- % % Please Note: Under rare circumstances, this test may fail with a
47- % % "{badmatch,{error,[{vnode_down,noproc}]}}' error. This is not a
48- % % failure of this test but rather a side effect of a race condition
49- % % in riak_core_vnode_proxy. It manifests due to the fact that the
50- % % test is attempting to send a command to a vnode that is in fact
51- % % down, however monitor only works by issuing a command and getting
52- % % a PID. In some instances, get_vnode_pid fails because vnode shutdown
53- % % is queued up in the mailbox before monitor node. Unfortunately, the
54- % % fix would require a fundamental shift in the architecture of
55- % % riak_core, which at the time of this writing is not feasible for
56- % % this rare failure case.
5746-module (pipe_verify_handoff_blocking ).
5847
5948-export ([
@@ -80,9 +69,12 @@ confirm() ->
8069 Inputs = lists :seq (1 , 20 ),
8170
8271 lager :info (" Start ~b nodes" , [? NODE_COUNT ]),
83- NodeDefs = lists :duplicate (? NODE_COUNT , {current , default }),
72+ Config = [{riak_core , [{ring_creation_size , 8 },
73+ {vnode_management_timer , 1000 },
74+ {handoff_concurrency , 100 },
75+ {vnode_inactivity_timeout , 1000 }]}],
8476 Services = [riak_pipe ],
85- [Primary ,Secondary ] = Nodes = rt :deploy_nodes (NodeDefs , Services ),
77+ [Primary ,Secondary ] = Nodes = rt :deploy_nodes (? NODE_COUNT , Config , Services ),
8678 % % Ensure each node owns 100% of it's own ring
8779 [? assertEqual ([Node ], rt :owners_according_to (Node )) || Node <- Nodes ],
8880
@@ -114,14 +106,18 @@ confirm() ->
114106
115107 lager :info (" Unpause workers" ),
116108 Runner ! go ,
109+ set_up_vnode_crashing_intercept (Primary ),
117110
118111 ok = rt :wait_until_transfers_complete (Nodes ),
119112
120113 FillerInputCount = stop_fillers (Fillers ),
121114
122115 % % if we make it this far, then no filler ever saw the vnode_down
123116 % % error message; otherwise badmatches in queue_filler/4 will have
124- % % halted the test
117+ % % halted the test. Note that we should not see this test fail
118+ % % with `{error,[{vnode_down,noproc}]}}` errors, as the `noproc` case
119+ % % should be handled similarly to the `normal` exit case in
120+ % % `riak_pipe_vnode:queue_work_wait`
125121
126122 _Status2 = pipe_status (Primary , Pipe ),
127123
@@ -139,7 +135,13 @@ confirm() ->
139135 lager :info (" ~s : PASS" , [atom_to_list (? MODULE )]),
140136 pass .
141137
142- % %% queue filling
138+ set_up_vnode_crashing_intercept (Primary ) ->
139+ lager :info (" Add intercept to kill vnode before calling the wait function" ),
140+ rt_intercept :add (Primary , {riak_core_vnode_master ,
141+ [{{command_return_vnode , 4 },
142+ stop_pipe_vnode_after_request_sent }]}).
143+
144+ % % queue filling
143145
144146% % @doc fill pipe vnode queues by repeatedly sending each input in the
145147% % input list until the queue reports timeout.
0 commit comments