Skip to content

Commit dd313a6

Browse files
PS-9244 postfix: binlog_server fails to resume copying binlog file after server restart (#51)
https://perconadev.atlassian.net/browse/PS-9244 Completely reworked the logic of the 'binsrv::events::reader_context' class - it is now implemented as a state machine expecting the following sequences of events within the binlogs: (ROTATE(artificial) FORMAT_DESCRIPTION <ANY>* (ROTATE|STOP)?)+ We now properly handle the case when after improper MySQL server termination the last used binary log may end up not having a STOP or ROTATE event as its last one. 'binlog_streaming.pull_mode' MTR test case now also checks for killing the server while Binlog Server Utility is running in the background in 'pull' mode.
1 parent 713fd54 commit dd313a6

7 files changed

Lines changed: 254 additions & 149 deletions

File tree

mtr/binlog_streaming/r/pull_mode.result

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -24,29 +24,38 @@ INSERT INTO t1 VALUES(DEFAULT);
2424

2525
*** Determining the second binary log name.
2626

27-
*** Filling the table with some more data and dropping the table.
27+
*** Filling the table with some more data.
28+
INSERT INTO t1 VALUES(DEFAULT);
29+
30+
*** Killing the server and restarting it after a pause to test for the
31+
*** missing ROTATE / STOP event at the end of the binary log .
32+
# Kill the server
33+
# restart
34+
35+
*** Determining the third binary log name.
36+
37+
*** Filling the table with some more data again and dropping the table.
2838
INSERT INTO t1 VALUES(DEFAULT);
2939
DROP TABLE t1;
3040

31-
*** FLUSHING the binlog one more time to make sure that the second one
41+
*** FLUSHING the binlog one more time to make sure that the third one
3242
*** is no longer open.
3343
FLUSH BINARY LOGS;
3444

35-
*** Determining the third binary log name.
45+
*** Determining the fourth binary log name.
3646

37-
*** Waiting till Binlog Server Utility starts processing the third
47+
*** Waiting till Binlog Server Utility starts processing the fourth
3848
*** binary log.
3949

4050
*** Sending SIGTERM signal to the Binlog Server Utility and waiting for
4151
*** the process to terminate
4252

43-
*** Checking that the Binlog Server utility detected an empty storage
44-
include/assert_grep.inc [Binlog storage must be initialized on an empty directory]
45-
4653
*** Comparing server and downloaded versions of the first binlog file
4754

4855
*** Comparing server and downloaded versions of the second binlog file
4956

57+
*** Comparing server and downloaded versions of the third binlog file
58+
5059
*** Removing the Binlog Server utility storage directory.
5160

5261
*** Removing the Binlog Server utility log file.

mtr/binlog_streaming/t/pull_mode.test

Lines changed: 31 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -60,30 +60,47 @@ INSERT INTO t1 VALUES(DEFAULT);
6060
--let $second_binlog = query_get_value(SHOW MASTER STATUS, File, 1)
6161

6262
--echo
63-
--echo *** Filling the table with some more data and dropping the table.
63+
--echo *** Filling the table with some more data.
64+
INSERT INTO t1 VALUES(DEFAULT);
65+
66+
--echo
67+
--echo *** Killing the server and restarting it after a pause to test for the
68+
--echo *** missing ROTATE / STOP event at the end of the binary log .
69+
--source include/kill_mysqld.inc
70+
# Sleeping here deliberately so that the Binlog Server Utility would encounter
71+
# read timeout and would try to reconnect several times.
72+
--sleep 10
73+
--source include/start_mysqld.inc
74+
75+
--echo
76+
--echo *** Determining the third binary log name.
77+
--let $third_binlog = query_get_value(SHOW MASTER STATUS, File, 1)
78+
79+
--echo
80+
--echo *** Filling the table with some more data again and dropping the table.
6481
INSERT INTO t1 VALUES(DEFAULT);
6582
DROP TABLE t1;
6683

6784
--echo
68-
--echo *** FLUSHING the binlog one more time to make sure that the second one
85+
--echo *** FLUSHING the binlog one more time to make sure that the third one
6986
--echo *** is no longer open.
7087
FLUSH BINARY LOGS;
7188

7289
--echo
73-
--echo *** Determining the third binary log name.
74-
--let $third_binlog = query_get_value(SHOW MASTER STATUS, File, 1)
90+
--echo *** Determining the fourth binary log name.
91+
--let $fourth_binlog = query_get_value(SHOW MASTER STATUS, File, 1)
7592

7693
--echo
77-
--echo *** Waiting till Binlog Server Utility starts processing the third
94+
--echo *** Waiting till Binlog Server Utility starts processing the fourth
7895
--echo *** binary log.
7996
# We grep the Binlog Server Utility log file in a loop until we encounter the
80-
# third binary log file name.
97+
# fourth binary log file name.
8198
--let $max_number_of_attempts = 60
8299
--let $iteration = 0
83100
while($iteration < $max_number_of_attempts)
84101
{
85102
--error 0, 1
86-
--exec grep --silent $third_binlog $binsrv_log_path
103+
--exec grep --silent $fourth_binlog $binsrv_log_path
87104
--let $grep_status = $__error
88105
if ($grep_status == 0)
89106
{
@@ -97,7 +114,7 @@ while($iteration < $max_number_of_attempts)
97114
}
98115
if ($grep_status != 0)
99116
{
100-
--die The Binlog Server Utility did not start processing the third binary log.
117+
--die The Binlog Server Utility did not start processing the fourth binary log.
101118
}
102119

103120
--echo
@@ -121,14 +138,6 @@ EOF
121138

122139
--remove_file $binsrv_pid_file
123140

124-
--echo
125-
--echo *** Checking that the Binlog Server utility detected an empty storage
126-
--let $assert_text = Binlog storage must be initialized on an empty directory
127-
--let $assert_file = $binsrv_log_path
128-
--let $assert_count = 1
129-
--let $assert_select = binlog storage initialized on an empty directory
130-
--source include/assert_grep.inc
131-
132141
--echo
133142
--echo *** Comparing server and downloaded versions of the first binlog file
134143
--let $local_file = $binlog_base_dir/$first_binlog
@@ -141,6 +150,12 @@ EOF
141150
--let $storage_object = $binsrv_storage_path/$second_binlog
142151
--source ../include/diff_with_storage_object.inc
143152

153+
--echo
154+
--echo *** Comparing server and downloaded versions of the third binlog file
155+
--let $local_file = $binlog_base_dir/$third_binlog
156+
--let $storage_object = $binsrv_storage_path/$third_binlog
157+
--source ../include/diff_with_storage_object.inc
158+
144159
# cleaning up
145160
--source ../include/tear_down_binsrv_environment.inc
146161

src/app.cpp

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@
4545
#include "binsrv/storage.hpp"
4646
#include "binsrv/storage_backend_factory.hpp"
4747

48+
#include "binsrv/event//checksum_algorithm_type.hpp"
4849
#include "binsrv/event/code_type.hpp"
4950
#include "binsrv/event/event.hpp"
5051
#include "binsrv/event/flag_type.hpp"
@@ -222,6 +223,12 @@ void process_binlog_event(const binsrv::event::event &current_event,
222223

223224
skip_open_binlog = false;
224225
} else {
226+
// in case when the server was not shut down properly, it won't have
227+
// ROTATE or STOP event as the last one in the binlog, so here we
228+
// handle this case by closing the old binlog and opening a new one
229+
if (storage.is_binlog_open()) {
230+
storage.close_binlog();
231+
}
225232
storage.open_binlog(current_rotate_body.get_binlog());
226233
}
227234
}
@@ -291,7 +298,11 @@ void receive_binlog_events(
291298

292299
util::const_byte_span portion;
293300

294-
binsrv::event::reader_context context{};
301+
// TODO: change this checksum algorithm to the value of the
302+
// @source_binlog_checksum variable that we set in the
303+
// 'connection::switch_to_replication()'
304+
binsrv::event::reader_context context{
305+
binsrv::event::checksum_algorithm_type::off};
295306

296307
// if binlog is still open, there is no sense to close it and re-open
297308
// instead, we will just instruct this loop to process the

src/binsrv/event/event.cpp

Lines changed: 8 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@
2727

2828
#include "binsrv/event/checksum_algorithm_type.hpp"
2929
#include "binsrv/event/code_type.hpp"
30-
#include "binsrv/event/flag_type.hpp"
3130
#include "binsrv/event/generic_body.hpp"
3231
#include "binsrv/event/generic_post_header.hpp"
3332
#include "binsrv/event/protocol_traits_fwd.hpp"
@@ -59,18 +58,12 @@ event::event(reader_context &context, util::const_byte_span portion)
5958
// format_description_events always include event footers with checksums
6059
footer_size = footer::size_in_bytes;
6160
} else {
62-
if (context.has_fde_processed()) {
63-
// if format_description event has already been encountered, we determine
64-
// whether there is a footer in the event from it
65-
footer_size = (context.get_current_checksum_algorithm() ==
66-
checksum_algorithm_type::crc32
67-
? footer::size_in_bytes
68-
: 0U);
69-
} else {
70-
// we get in this branch only for the very first artificial rotate event
71-
// and in this case it does not include the footer
72-
footer_size = 0U;
73-
}
61+
// we determine whether there is a footer in the event from the
62+
// reader_context
63+
footer_size = (context.get_current_checksum_algorithm() ==
64+
checksum_algorithm_type::crc32
65+
? footer::size_in_bytes
66+
: 0U);
7467
}
7568

7669
const std::size_t event_size = std::size(portion);
@@ -80,31 +73,8 @@ event::event(reader_context &context, util::const_byte_span portion)
8073
"header");
8174
}
8275
std::size_t post_header_size{0U};
83-
if (context.has_fde_processed()) {
84-
// if format_description event has already been encountered in the stream,
85-
// we take post-header length from it
86-
post_header_size = context.get_current_post_header_length(code);
87-
} else {
88-
// we expect that we can receive only 2 events before there is a
89-
// format_description event we can refer to: rotate (with artificial
90-
// flag) and format description event itself
91-
post_header_size = get_expected_post_header_length(code);
92-
switch (code) {
93-
case code_type::rotate:
94-
if (!common_header_.get_flags().has_element(flag_type::artificial)) {
95-
util::exception_location().raise<std::logic_error>(
96-
"rotate event without preceding format_description event must have "
97-
"'artificial' flag set");
98-
}
99-
break;
100-
case code_type::format_description:
101-
break;
102-
default:
103-
util::exception_location().raise<std::logic_error>(
104-
"this type of event must be preceded by a format_description event");
105-
}
106-
assert(post_header_size != unspecified_post_header_length);
107-
}
76+
post_header_size = context.get_current_post_header_length(code);
77+
assert(post_header_size != unspecified_post_header_length);
10878

10979
const std::size_t group_size =
11080
common_header::size_in_bytes + post_header_size + footer_size;

0 commit comments

Comments
 (0)