@@ -1830,8 +1830,15 @@ compact_file(File, State = #gc_state { file_summary_ets = FileSummaryEts,
18301830 % % Open the file.
18311831 FileName = filenum_to_name (File ),
18321832 {ok , Fd } = file :open (form_filename (Dir , FileName ), [read , write , binary , raw ]),
1833- % % Load the messages.
1834- Messages = load_and_vacuum_message_file (File , State ),
1833+ % % Load the messages. It's possible to get 0 messages here;
1834+ % % that's OK. That means we have little to do as the file is
1835+ % % about to be deleted.
1836+ {Messages , _ } = scan_and_vacuum_message_file (File , State ),
1837+ % % Blank holes. We must do this first otherwise the file is left
1838+ % % with data that may confuse the code (for example data that looks
1839+ % % like a message, isn't a message, but spans over a real message).
1840+ % % We blank more than is likely required but better safe than sorry.
1841+ blank_holes_in_file (Fd , Messages ),
18351842 % % Compact the file.
18361843 {ok , TruncateSize , IndexUpdates } = do_compact_file (Fd , 0 , Messages , lists :reverse (Messages ), []),
18371844 % % Sync and close the file.
@@ -1876,6 +1883,32 @@ compact_file(File, State = #gc_state { file_summary_ets = FileSummaryEts,
18761883 garbage_collect (),
18771884 ok .
18781885
1886+ % % We must special case the blanking of the beginning of the file.
1887+ blank_holes_in_file (Fd , [# msg_location { offset = Offset }|Tail ])
1888+ when Offset =/= 0 ->
1889+ Bytes = <<0 :Offset /unit :8 >>,
1890+ ok = file :pwrite (Fd , 0 , Bytes ),
1891+ blank_holes_in_file1 (Fd , Tail );
1892+ blank_holes_in_file (Fd , Messages ) ->
1893+ blank_holes_in_file1 (Fd , Messages ).
1894+
1895+ blank_holes_in_file1 (Fd , [
1896+ # msg_location { offset = OneOffset , total_size = OneSize },
1897+ # msg_location { offset = TwoOffset } = Two
1898+ |Tail ]) when OneOffset + OneSize < TwoOffset ->
1899+ Offset = OneOffset + OneSize ,
1900+ Size = TwoOffset - Offset ,
1901+ Bytes = <<0 :Size /unit :8 >>,
1902+ ok = file :pwrite (Fd , Offset , Bytes ),
1903+ blank_holes_in_file1 (Fd , [Two |Tail ]);
1904+ % % We either have only one message left, or contiguous messages.
1905+ blank_holes_in_file1 (Fd , [_ |Tail ]) ->
1906+ blank_holes_in_file1 (Fd , Tail );
1907+ % % No need to blank the hole past the last message as we will
1908+ % % not write there (no confusion possible) and truncate afterwards.
1909+ blank_holes_in_file1 (_ , []) ->
1910+ ok .
1911+
18791912% % If the message at the end fits into the hole we have found, we copy it there.
18801913% % We will do the ets:updates after the data is synced to disk.
18811914do_compact_file (Fd , Offset , Start = [# msg_location { offset = StartMsgOffset }|_ ],
@@ -1962,27 +1995,7 @@ delete_file(File, State = #gc_state { file_summary_ets = FileSummaryEts,
19621995 ok
19631996 end .
19641997
1965- load_and_vacuum_message_file (File , State = # gc_state { dir = Dir }) ->
1966- % % Messages here will be end-of-file at start-of-list
1967- {ok , Messages , _FileSize } =
1968- scan_file_for_valid_messages (Dir , filenum_to_name (File )),
1969- % % foldl will reverse so will end up with msgs in ascending offset order
1970- lists :foldl (
1971- fun ({MsgId , TotalSize , Offset }, Acc ) ->
1972- case index_lookup (MsgId , State ) of
1973- # msg_location { file = File , total_size = TotalSize ,
1974- offset = Offset , ref_count = 0 } = Entry ->
1975- ok = index_delete_object (Entry , State ),
1976- Acc ;
1977- # msg_location { file = File , total_size = TotalSize ,
1978- offset = Offset } = Entry ->
1979- [ Entry | Acc ];
1980- _ ->
1981- Acc
1982- end
1983- end , [], Messages ).
1984-
1985- scan_and_vacuum_message_file (File , State = # gc_state { dir = Dir }) ->
1998+ scan_and_vacuum_message_file (File , State = # gc_state { dir = Dir }) ->
19861999 % % Messages here will be end-of-file at start-of-list
19872000 {ok , Messages , _FileSize } =
19882001 scan_file_for_valid_messages (Dir , filenum_to_name (File )),
@@ -1997,7 +2010,15 @@ scan_and_vacuum_message_file(File, State = #gc_state { dir = Dir }) ->
19972010 # msg_location { file = File , total_size = TotalSize ,
19982011 offset = Offset } = Entry ->
19992012 {[ Entry | List ], TotalSize + Size };
2000- _ ->
2013+ % % Fan-out may remove the entry but also write a new
2014+ % % entry in a different file when it needs to write
2015+ % % a message and the existing reference is in a file
2016+ % % that's about to be deleted. So we explicitly accept
2017+ % % these cases and ignore this message.
2018+ # msg_location { file = OtherFile , total_size = TotalSize }
2019+ when File =/= OtherFile ->
2020+ Acc ;
2021+ not_found ->
20012022 Acc
20022023 end
20032024 end , {[], 0 }, Messages ).
0 commit comments