9898 consistent_query_ref ()}),
9999 pending_consistent_queries := [consistent_query_ref ()],
100100 commit_latency => option (non_neg_integer ()),
101- snapshot_phase => chunk_flag (),
101+ % % snapshot_phase tracks the current phase of snapshot reception:
102+ % % - chunk_flag() for normal phases (init, pre, next, last)
103+ % % - {awaiting_pending, EventType, Rpc} when waiting for pending WAL
104+ % % writes to complete before finalizing snapshot installation
105+ snapshot_phase => chunk_flag () |
106+ {awaiting_pending , term (), # install_snapshot_rpc {}},
102107 pending_release_cursor => {ra_index (), term (),
103108 [ra_machine :release_cursor_condition ()]},
104- % % TODO: review these 3, specific to handle_receive_snapshot
105- % % could temporarily increase state size over small map (32)
109+ % % temporary state for handle_receive_snapshot
106110 current_event_type => term (),
107- snapshot_has_live_indexes => boolean (),
108- snapshot_next_event => term ()
111+ snapshot_has_live_indexes => boolean ()
109112 }.
110113
111114-type state () :: ra_server_state ().
@@ -1621,7 +1624,7 @@ handle_receive_snapshot(#install_snapshot_rpc{term = Term,
16211624 % % works as an assertion also
16221625 {AcceptingSnapIdx , _ } = ra_snapshot :accepting (SnapState0 ),
16231626 SnapshotHasLiveIndexes = maps :get (snapshot_has_live_indexes , State0 , false ),
1624- LogHasPendingIndexes = maps : get ( num_pending , ra_log :overview (Log00 )) > 0 ,
1627+ LogHasPendingIndexes = ra_log :has_pending (Log00 ),
16251628 case ChunkFlag of
16261629 init when SnapPhase == init andalso
16271630 SnapIndex == AcceptingSnapIdx ->
@@ -1673,16 +1676,14 @@ handle_receive_snapshot(#install_snapshot_rpc{term = Term,
16731676 when SnapshotHasLiveIndexes andalso
16741677 LogHasPendingIndexes ->
16751678 % % we cannot yet complete the snapshot as there are pending
1676- % % log indexes
1679+ % % log indexes, defer completion until they are written
16771680 EventType = maps :get (current_event_type , State0 ),
16781681 ? DEBUG (" ~ts : receiving snapshot chunk: ~b / ~w , index ~b , term ~b "
16791682 " cannot yet complete as log has pending indexes" ,
16801683 [LogId , Num , ChunkFlag , SnapIndex , SnapTerm ]),
16811684 {receive_snapshot ,
1682- State0 #{snapshot_next_event => {EventType , Rpc }}, []};
1685+ State0 #{snapshot_phase => {awaiting_pending , EventType , Rpc }}, []};
16831686 last ->
1684- % % TODO: we can't do this bit until all pending log entries have
1685- % % been processed
16861687 ? assert (SnapIndex == AcceptingSnapIdx ),
16871688 ? DEBUG (" ~ts : receiving snapshot chunk: ~b / ~w , index ~b , term ~b " ,
16881689 [LogId , Num , ChunkFlag , SnapIndex , SnapTerm ]),
@@ -1737,7 +1738,7 @@ handle_receive_snapshot(#install_snapshot_rpc{term = Term,
17371738 machine_state => MacState }),
17381739 State = maps :without ([snapshot_phase ,
17391740 snapshot_has_live_indexes ,
1740- snapshot_next_event ], State1 ),
1741+ current_event_type ], State1 ),
17411742 put_counter (Cfg , ? C_RA_SVR_METRIC_LAST_APPLIED , SnapIndex ),
17421743 % % it was the last snapshot chunk so we can revert back to
17431744 % % follower status
@@ -1757,15 +1758,27 @@ handle_receive_snapshot(#append_entries_rpc{term = Term} = Msg,
17571758 State = abort_receive (State0 ),
17581759 {follower , update_term (Term , State ), [{next_event , Msg }]};
17591760handle_receive_snapshot ({ra_log_event , Evt },
1760- #{cfg := # cfg {log_id = _LogId },
1761+ #{cfg := # cfg {log_id = LogId },
1762+ snapshot_phase := Phase ,
17611763 log := Log0 } = State0 ) ->
1762- % simply forward all other events to ra_log
1763- % whilst the snapshot is being received
1764+ % % forward log events to ra_log whilst the snapshot is being received
17641765 {Log , Effects } = ra_log :handle_event (Evt , Log0 ),
1765- case maps :take (snapshot_next_event , State0 ) of
1766- {{EventType , NextEvt }, State } ->
1767- {receive_snapshot , State #{log => Log },
1768- [{next_event , EventType , NextEvt } | Effects ]};
1766+ case Phase of
1767+ {awaiting_pending , EventType , DeferredRpc } ->
1768+
1769+ case ra_log :has_pending (Log ) of
1770+ false ->
1771+ ? DEBUG (" ~ts : pending indexes cleared, completing snapshot" ,
1772+ [LogId ]),
1773+ % % replay the deferred last chunk, set phase to next so
1774+ % % the replayed event doesn't hit the awaiting_pending guard
1775+ {receive_snapshot ,
1776+ State0 #{log => Log ,
1777+ snapshot_phase => next },
1778+ [{next_event , EventType , DeferredRpc } | Effects ]};
1779+ true ->
1780+ {receive_snapshot , State0 #{log => Log }, Effects }
1781+ end ;
17691782 _ ->
17701783 {receive_snapshot , State0 #{log => Log }, Effects }
17711784 end ;
@@ -1824,23 +1837,24 @@ handle_receive_snapshot(Msg, State) ->
18241837 % % TODO: work out what else to handle
18251838 {receive_snapshot , State , [{reply , {error , {unsupported_call , Msg }}}]}.
18261839
1827- abort_receive (#{snapshot_phase := Phase ,
1840+ abort_receive (#{snapshot_phase := _Phase ,
18281841 last_applied := LastApplied ,
18291842 log := Log0 } = State ) ->
18301843 SnapState0 = ra_log :snapshot_state (Log0 ),
18311844 SnapState = ra_snapshot :abort_accept (SnapState0 ),
18321845 Log1 = ra_log :set_snapshot_state (SnapState , Log0 ),
1833- Log = case Phase of
1834- pre ->
1846+ Log = case State of
1847+ #{snapshot_has_live_indexes := true } ->
1848+ % % live indexes were written during pre phase,
1849+ % % reset log index to undo them
18351850 {ok , Log2 } = ra_log :set_last_index (LastApplied , Log1 ),
18361851 Log2 ;
18371852 _ ->
18381853 Log1
18391854 end ,
18401855 clear_leader_id (maps :without ([snapshot_phase ,
18411856 snapshot_has_live_indexes ,
1842- current_event_type ,
1843- snapshot_next_event ],
1857+ current_event_type ],
18441858 State #{log => Log })).
18451859
18461860-spec handle_await_condition (ra_msg (), ra_server_state ()) ->
0 commit comments